In [None]:
# %load 
import torch as tch
from torch import nn
import torchaudio as ta
import torchaudio.transforms as tatx
import torchaudio.functional as tafn
from pathlib import Path
import plotly.express as px
import pandas as pd
import numpy as np
from IPython.display import Audio

from train_tools import find_lr, check_with_patient, UpdatingPlotlyLines, fit_step, evaluate
from torch.optim import Adam
from torch.nn import functional as nnfn

device = tch.device("cuda")

%load_ext autoreload
%autoreload 2 


In [None]:
from prep_we_sample import CommonVoiceDataset, get_segment, get_middle_segment, get_random_segment, get_middle_segment_jitter, PositiveSampleSet, NegativeSampleSet, MergePosNegSet

In [None]:
full_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
full_df = full_ds.get_df('validated')
full_df_shuffled = full_df.sample(len(full_df), replace=False)

In [None]:
neg_val_size=50000
neg_train_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
neg_train_ds.use_df(full_df_shuffled.iloc[:-neg_val_size])

neg_val_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
neg_val_ds.use_df(full_df_shuffled.iloc[neg_val_size:])


In [None]:
we_df = pd.read_csv('temp/we_df.csv')
we_seg_df = pd.read_csv('temp/wee_seg_16000.csv', index_col=0)

idx = np.random.choice(len(we_df), len(we_df), replace=False)
we_df_shuffled = we_df.iloc[idx]
we_seg_df_shuffled = we_seg_df.iloc[idx]



In [None]:
pos_val_size = 500
pos_train_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
pos_train_ds.use_df(we_df_shuffled[:-pos_val_size])
we_seg_df_shuffled_train = we_seg_df_shuffled[:-pos_val_size]

pos_val_ds = CommonVoiceDataset('dataset/Common Voice Corpus 1/en/', 16000)
pos_val_ds.use_df(we_df_shuffled[-pos_val_size:])
we_seg_df_shuffled_val = we_seg_df_shuffled[-pos_val_size:]


In [None]:
class GlobalMaxPool1d(nn.Module):
    def __init__(self, sz=None, squeeze=False):
        super().__init__()
        self.squeeze = squeeze
    def forward(self, x): 
        inp_size = x.size()
        out = nn.functional.max_pool1d(input=x,
                  kernel_size= (inp_size[-1]))

        if self.squeeze:
            out = out[..., -1]
        return out

    

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv = nn.Sequential(
            nn.Conv1d(1, 32, 16, stride=8), 
            nn.ReLU(), 

            nn.Conv1d(32, 64, 16, stride=8), 
            nn.ReLU(), 

            nn.Conv1d(64, 128, 8, stride=4), 
            nn.ReLU(), 

            nn.Conv1d(128, 256, 4, stride=2), 
            nn.ReLU(), 
            
           
            GlobalMaxPool1d(squeeze=True),

            #nn.Dropout(0.4),

            #nn.Flatten(),
            # nn.Linear(256, 64),
            # nn.ReLU(), 

            # nn.Linear(64, 1),
            # nn.Sigmoid(),
        )
    
    def forward(self, x):
        return tch.squeeze(self.conv(x))


m = Model()#.to('cuda')
optimiser = Adam(m.parameters(), 0.001)
m(tch.rand(1, 1, 120000)).shape

In [None]:
i = 0
for k, v in m.state_dict().items():
    i += np.prod(v.shape)
i

In [None]:
from torch.utils.data import DataLoader


In [None]:
def get_dl(pos_ds, neg_ds, we_seg_df): 
    pos_ds = PositiveSampleSet(pos_ds, 400, we_seg_df, 0.2) # make it 0.2 to reduce the confusion
    neg_ds = NegativeSampleSet(neg_ds, 400)
    ds = MergePosNegSet(pos_ds, neg_ds)

    dl = DataLoader(ds, batch_size=64, shuffle=True)
    return dl
train_dl = get_dl(pos_train_ds, neg_train_ds, we_seg_df_shuffled_train)
val_dl = get_dl(pos_val_ds, neg_val_ds, we_seg_df_shuffled_val)


In [None]:
# preload the cache
for i, (x, y) in enumerate(train_dl):
    x, y

In [None]:
# preload the cache
for x, y in val_dl: 
    x, y

In [None]:
find_lr(Model, train_dl, Adam, nnfn.binary_cross_entropy, starting_lr=1e-4)

In [None]:
from torch import optim 
from train_tools import find_lr, check_with_patient, UpdatingPlotlyLines, fit_step, evaluate, evaluate_v2
from nb_tools import show_global_variables
@show_global_variables
def fit(
        model: nn.Module, 
        train_dataloader: DataLoader, 
        val_dataloader:DataLoader, 
        optimiser: optim.Optimizer, 
        loss_fn, 
        epochs: int, 
        silent=True
    ):
    """
    reference implementation 
    """
    
    fig = UpdatingPlotlyLines('epoch', ['train_loss', 'eval_loss', 'acc'])
    fig.display()
    oop = check_with_patient(15)
    for epoch in range(epochs):
        train_loss = fit_step(model, train_dataloader, optimiser, loss_fn, silent=silent)
        y_pred, y = evaluate_v2(model, val_dataloader)

        eval_loss = loss_fn(y_pred, y)
        acc = tch.sum(y == (y_pred>0.5))/len(y)
        

        fig.append(epoch=epoch, train_loss=train_loss, eval_loss=eval_loss.cpu(), acc=acc.cpu())

        if oop(eval_loss):
            return fig

    return fig 

m = Model().to('cuda')
o = Adam(m.parameters(), lr=0.0015,)

fit(m, train_dl, val_dl, o, nnfn.binary_cross_entropy, 200 )



In [None]:
from train_tools import find_lr, check_with_patient, UpdatingPlotlyLines, fit_step, evaluate, evaluate_v2


# Error inspection

In [None]:
def evaluate_v3(model: nn.Module, dataloader: DataLoader):
    device = next(model.parameters()).device
    model.eval()


    fp_xs = []
    fn_xs = []

    fp_ys = []
    fn_ys = []



    with tch.no_grad():
        for batch, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)
            out = model(x)
            y_pred = out > 0.5

            y = y.to(bool)

            false_positive = y_pred & (~y)
            false_negative = (~y_pred) & y

            
            fp_xs.append(x[false_positive])
            fp_ys.append(out[false_positive])

            fn_xs.append(x[false_negative])
            fn_ys.append(out[false_negative])

        
    return tch.concat(fp_xs), tch.concat(fn_xs), tch.concat(fp_ys), tch.concat(fn_ys)
fp, fn, fp_ys, fn_ys = evaluate_v3(m, val_dl)

In [None]:
len(fn)

In [None]:
len(fp)

In [None]:
Audio(fp[0].cpu(), rate=16000)

In [None]:
Audio(fp[2].cpu(), rate=16000)

In [None]:
Audio(fp[1].cpu(), rate=16000)

In [None]:
Audio(fp[3].cpu(), rate=16000)

In [None]:
Audio(fp[2].cpu(), rate=16000)

In [None]:
Audio(fp[3].cpu(), rate=16000)

In [None]:
Audio(fp[4].cpu(), rate=16000)

In [None]:
Audio(fn[0].cpu(), rate=16000)

In [None]:
Audio(fn[1].cpu(), rate=16000)

In [None]:
Audio(fn[2].cpu(), rate=16000)

In [None]:
x = fn[2:3].clone().detach().requires_grad_(True)

In [None]:
o = Adam([x], lr=0.0001)


In [None]:

for _ in range(10):
    l = 1-m(x)
    l.backward()
    o.step()

In [None]:
m(x)

In [None]:
m(fn[2:3])

In [None]:
px.line((fn[2:3].cpu()-x.clone().detach().cpu())[0,0])

In [None]:
px.line((x.clone().detach().cpu())[0,0])

In [None]:
px.line(y=[(fn[2:3].cpu())[0,0], (x.clone().detach().cpu())[0,0]])

In [None]:
Audio((fn[2:3].cpu()-x.clone().detach().cpu())[0], rate=16000)

In [None]:
l = 1-m(x)

In [None]:
l.backward()

In [None]:
o.step()

In [None]:
Audio(x[0, 0].clone().detach().cpu(), rate=16000)

In [None]:
Audio(fn[2].cpu(), rate=16000)

In [None]:
tatx.Spectrogram(fn[0,0])

In [None]:
Audio(fn[5].cpu(), rate=16000)

In [None]:
fp.shape

In [None]:
evaluate(m, dl, nnfn.binary_cross_entropy)

In [None]:
y_pred>0.5

In [None]:
tch.sum(y == (y_pred>0.5))/len(y)

In [None]:
tch.sum((y==1) & (y_pred>0.5))/tch.sum(y_pred>0.5)

In [None]:
tch.sum((y==0) & (y_pred>0.5))/tch.sum(y_pred>0.5)

In [None]:
tch.sum((y==1) & (y_pred<0.5))/tch.sum(y_pred<0.5)

In [None]:
tch.sum((y==0) & (y_pred<0.5))/tch.sum(y_pred<0.5)

# Export

In [None]:
onnx_program = tch.onnx.dynamo_export(m, tch.rand(1, 1, 6400).to('cuda'))
onnx_program.save('model_v0.onnx')

# Validation 2

In [None]:
sig, fs = ta.load('./dataset/weeeee.mp3')
sig = tafn.resample(sig[0], fs, 16000)

In [None]:
Audio(sig[23000:23000+6400], rate=16000)

In [None]:
x = sig

In [None]:
sig

In [None]:
x[None, None, :].numpy()

In [None]:
from wecognition_pipeline import RunModel 

 

m = RunModel('model_v0.onnx', 16000, 6400)
x = m.striding(sig.numpy(), 16000, 0.5)

In [None]:
x[None, 0, :].shape

In [None]:
m.model(x[None,0, None, ])
ys = []
for x_ in x:
    ys.append(m.model(x_[None, None, :]))

In [None]:
px.line(ys)

In [None]:
Audio(sig, rate=16000)

In [None]:
sig.shape

In [None]:
len(sig.numpy())

In [None]:
x.numpy().shape