In [3]:
# %load 
import torch as tch
from torch import nn
import torchaudio as ta
import torchaudio.transforms as tatx
import torchaudio.functional as tafn
from pathlib import Path
import plotly.express as px
import pandas as pd
import numpy as np
from IPython.display import Audio

from train_tools import find_lr, check_with_patient, UpdatingPlotlyLines, fit_step, evaluate
from torch.optim import Adam
from torch.nn import functional as nnfn

device = tch.device("cuda")

%load_ext autoreload
%autoreload 2 


In [4]:
from prep_we_sample import CommonVoiceDataset, get_segment, get_middle_segment, get_random_segment, get_middle_segment_jitter, PositiveSampleSet, NegativeSampleSet, MergePosNegSet

In [6]:
we_df = pd.read_csv('temp/we_df.csv')
we_seg_df = pd.read_csv('temp/wee_seg_16000.csv', index_col=0)

In [7]:
full_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
full_ds.use_df(full_ds.get_df('validated'))

we_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
we_ds.use_df(we_df)

In [13]:
class GlobalMaxPool1d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
       

    def forward(self, x): 
        inp_size = x.size()
        return nn.functional.max_pool1d(input=x,
                  kernel_size= (inp_size[-1]))

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv = nn.Sequential(
            nn.Conv1d(1, 32, 16, stride=8), 
            nn.ReLU(), 

            nn.Conv1d(32, 64, 16, stride=8), 
            nn.ReLU(), 

            nn.Conv1d(64, 128, 8, stride=4), 
            nn.ReLU(), 

            nn.Conv1d(128, 256, 4, stride=2), 
            nn.ReLU(), 
            
           
            GlobalMaxPool1d(),

 

            nn.Flatten(),
            nn.Linear(256, 64),
            nn.ReLU(), 

            nn.Linear(64, 1),
            nn.Sigmoid(),
        )
    
    def forward(self, x):
        return tch.squeeze(self.conv(x))


m = Model()#.to('cuda')
optimiser = Adam(m.parameters(), 0.001)
m(get_middle_segment(we_ds, we_seg_df, 7, 400)[None, ...]).shape

torch.Size([])

In [14]:
i = 0
for k, v in m.state_dict().items():
    i += np.prod(v.shape)
i

246881

In [10]:
from torch.utils.data import DataLoader, Dataset, StackDataset


In [21]:
neg = NegativeSampleSet(full_ds, 400)
pos = PositiveSampleSet(we_ds, 400, we_seg_df, 0.2) # make it 0.2 to reduce the confusion
ds2 = MergePosNegSet(pos, neg)
dl = DataLoader(ds2, batch_size=64, shuffle=True)


In [15]:
find_lr(Model, dl, Adam, nnfn.binary_cross_entropy, starting_lr=1e-4)

FigureWidget({
    'data': [{'name': 'train_loss', 'type': 'scatter', 'uid': '50a2310d-fade-48ba-af48-3a0287b1af86', 'x': [], 'y': []},
             {'name': 'lr', 'type': 'scatter', 'uid': '3f203d85-d74b-45a1-b6cc-3f0ce6630e10', 'x': [], 'y': []}],
    'layout': {'template': '...'}
})

FigureWidget({
    'data': [{'name': 'train_loss', 'type': 'scatter', 'uid': 'b9de87fa-7cbe-43be-a865-26430d198611', 'x': [], 'y': []}],
    'layout': {'template': '...'}
})

In [22]:
from torch import optim 
from train_tools import find_lr, check_with_patient, UpdatingPlotlyLines, fit_step, evaluate, evaluate_v2

def fit(
        model: nn.Module, 
        train_dataloader: DataLoader, 
        val_dataloader:DataLoader, 
        optimiser: optim.Optimizer, 
        loss_fn, 
        epochs: int, 
        silent=True
    ):
    """
    reference implementation 
    """

    fig = UpdatingPlotlyLines('epoch', ['train_loss', 'acc'])
    fig.display()
    oop = check_with_patient(10)
    for epoch in range(epochs):
        train_loss = fit_step(model, train_dataloader, optimiser, loss_fn, silent=silent)
        y_pred, y = evaluate_v2(m, dl)

        acc = tch.sum(y == (y_pred>0.5))/len(y)
        

        fig.append(epoch=epoch, train_loss=train_loss, acc=acc.cpu())

        if oop(train_loss):
            return fig

    return fig 

m = Model().to('cuda')
o = Adam(m.parameters(), lr=0.0015,)

fit(m, dl, dl, o, nnfn.binary_cross_entropy, 200 )



FigureWidget({
    'data': [{'name': 'train_loss', 'type': 'scatter', 'uid': 'f41ce38c-d4e1-45b5-9443-889881e9275d', 'x': [], 'y': []},
             {'name': 'acc', 'type': 'scatter', 'uid': '87dd49da-c7ba-4d93-aace-2cc8f5907ad3', 'x': [], 'y': []}],
    'layout': {'template': '...'}
})

<train_tools.UpdatingPlotlyLines at 0x7ef5f0703520>

In [14]:
from train_tools import find_lr, check_with_patient, UpdatingPlotlyLines, fit_step, evaluate, evaluate_v2


In [118]:
y_pred, y = evaluate_v2(m, dl)

In [23]:
def evaluate_v3(model: nn.Module, dataloader: DataLoader):
    device = next(model.parameters()).device
    model.eval()


    fp_xs = []
    fn_xs = []

    fp_ys = []
    fn_ys = []



    with tch.no_grad():
        for batch, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)
            out = model(x)
            y_pred = out > 0.5

            y = y.to(bool)

            false_positive = y_pred & (~y)
            false_negative = (~y_pred) & y

            
            fp_xs.append(x[false_positive])
            fp_ys.append(out[false_positive])

            fn_xs.append(x[false_negative])
            fn_ys.append(out[false_negative])

        
    return tch.concat(fp_xs), tch.concat(fn_xs), tch.concat(fp_ys), tch.concat(fn_ys)
fp, fn, fp_ys, fn_ys = evaluate_v3(m, dl)

In [24]:
len(fn)

105

In [25]:
len(fp)

162

In [36]:
Audio(fp[50].cpu(), rate=16000)

In [37]:
Audio(fp[49].cpu(), rate=16000)

In [26]:
Audio(fp[0].cpu(), rate=16000)

In [27]:
Audio(fp[1].cpu(), rate=16000)

In [28]:
Audio(fp[2].cpu(), rate=16000)

In [29]:
Audio(fp[3].cpu(), rate=16000)

In [30]:
Audio(fp[4].cpu(), rate=16000)

In [31]:
Audio(fn[0].cpu(), rate=16000)

In [32]:
Audio(fn[1].cpu(), rate=16000)

In [33]:
Audio(fn[2].cpu(), rate=16000)

In [34]:
Audio(fn[3].cpu(), rate=16000)

In [35]:
Audio(fn[5].cpu(), rate=16000)

In [116]:
fp.shape

torch.Size([206, 1, 6400])

In [117]:
evaluate(m, dl, nnfn.binary_cross_entropy)

0.1399318601384207

In [38]:
y_pred>0.5

tensor([False,  True, False,  ..., False,  True,  True], device='cuda:0')

In [119]:
tch.sum(y == (y_pred>0.5))/len(y)

tensor(0.9507, device='cuda:0')

In [120]:
tch.sum((y==1) & (y_pred>0.5))/tch.sum(y_pred>0.5)

tensor(0.9589, device='cuda:0')

In [121]:
tch.sum((y==0) & (y_pred>0.5))/tch.sum(y_pred>0.5)

tensor(0.0411, device='cuda:0')

In [122]:
tch.sum((y==1) & (y_pred<0.5))/tch.sum(y_pred<0.5)

tensor(0.0572, device='cuda:0')

In [45]:
tch.sum((y==0) & (y_pred<0.5))/tch.sum(y_pred<0.5)

tensor(0.9205, device='cuda:0')