tensorboard --logdir=runs

In [6]:
import os
from matplotlib import pyplot as plt
import time
import numpy as np
import pandas as pd
import math
import glob
import random
from collections import Counter

import torchaudio

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchaudio.functional as torchaudio_F
import torchaudio.transforms as torchaudio_T
import tqdm

from src.dataset_v2 import *
from src.model import CPC, CPC_classifier, CPC_classifier_v3

In [7]:
def get_dataloader_metadata(wav_path, dataset_path, top_labels=None): 
    path_list = glob.glob(wav_path+'/*.wav')
    dialog_speaker_id = list(map(lambda x: x.split('/')[-1].split('.')[0], path_list))
    dialog_id = list(map(lambda x: x.split('/')[-1].split('_')[0], path_list))
    df_path = pd.DataFrame({'dialog_id': dialog_id, 'dialog_speaker_id': dialog_speaker_id, 'path': path_list})

    df_metadata = pd.read_csv(dataset_path, usecols=['dialog_id', 'speaker', 'da_tag', 'start_time', 'end_time'])
    df_metadata['dialog_speaker_id'] = df_metadata['dialog_id'] + '_' + df_metadata['speaker']
    df_metadata = df_metadata.merge(df_path, how='inner')

    # Compute Top 10 tags for train
    if top_labels == None:
        cnt = Counter(df_metadata['da_tag'])
        print(cnt.most_common()[0:10])
        top_labels = list(map(lambda x: x[0], cnt.most_common()[0:10]))
    label_map = dict(zip(top_labels, range(10)))

    df_metadata = df_metadata[df_metadata['da_tag'].apply(lambda x: x in top_labels)]
    df_metadata['label'] = df_metadata['da_tag'].apply(lambda x: label_map[x])

    df_metadata = df_metadata.reset_index(drop=True)
    
    return df_metadata, top_labels

In [8]:
wav_path = os.path.join(os.path.expanduser('~'), 'Jupyter-data/Switchboard-DA')
trainset_path = './train.csv'
testset_path = './test.csv'
df_dl_train, top_labels = get_dataloader_metadata(wav_path, trainset_path)
df_dl_test, _ = get_dataloader_metadata(wav_path, testset_path, top_labels=top_labels)

[('x', 29998), ('sd', 19364), ('b', 10028), ('sv', 7428), ('+', 4919), ('%', 4294), ('aa', 3014), ('ba', 1112), ('qy', 1043), ('ny', 729)]


# Training

In [9]:
batch_size = 32
dataset_train = Switchboard_Dataset_trainer_v3(df_dl_train)
dataset_test = Switchboard_Dataset_trainer_v3(df_dl_test)
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=padding_tensor_trainer_v3)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, collate_fn=padding_tensor_trainer_v3)

In [10]:
model_clf = CPC_classifier_v3(
    phi_dim=256,
    c_dim=128,
    rnn_num_layers=1,
)

for name, param in model_clf.encoder2.named_parameters():
    param = nn.parameter.Parameter(torch.ones(param.shape), requires_grad=False)
    
model_clf.cuda()
optimizer = optim.Adam(model_clf.parameters(), lr=0.001)

loss_vec = []

writer = SummaryWriter(flush_secs=60)
cnt_step = 0

In [None]:
for ep in range(1,100):
    model_clf.train()
    avg_loss = 0
    for x, y in tqdm.tqdm(dataloader_train):
        optimizer.zero_grad()
        y_hat = model_clf(x.to('cuda'))
        loss = F.cross_entropy(y_hat, y.to('cuda'))

        loss.backward()
        optimizer.step()
        # scheduler.step()

        avg_loss += loss.cpu().detach().numpy()/x.shape[0]
        cnt_step += 1
        writer.add_scalar('Loss/train', loss, cnt_step)
        acc = (torch.topk(y_hat,1)[1].cpu().squeeze(1) == y).numpy().mean()
        writer.add_scalar('Acc/train', acc, cnt_step)
    
    model_clf.eval()
    avg_loss = 0
    avg_acc = 0
    with torch.no_grad():
        for x, y in tqdm.tqdm(dataloader_test):
            y_hat = model_clf(x.to('cuda'))
            
            loss = F.cross_entropy(y_hat, y.to('cuda'))
            avg_loss += loss.cpu().detach().numpy()
            
            acc = (torch.topk(y_hat,1)[1].cpu().squeeze(1) == y).numpy().sum()
            avg_acc += acc

    avg_loss = avg_loss/len(dataloader_test)
    avg_acc = avg_acc/len(dataset_test)
    
    print(avg_loss)
    print(avg_acc)

In [28]:
torch.save(model_clf.state_dict(), './sw-clfv3-vF-step270k.pth')

# Evaluate

In [10]:
model_clf = CPC_classifier_v3(
    phi_dim=256,
    c_dim=128,
    rnn_num_layers=1,
)
model_clf.load_state_dict(torch.load('./sw-clfv3-vF-step160k.pth'))
model_clf.cuda()
model_clf.eval()
print('ready')

ready


In [11]:
model_clf.eval()
avg_loss = 0
avg_acc = 0
with torch.no_grad():
    for x, y in tqdm.tqdm(dataloader_test):
        y_hat = model_clf(x.to('cuda'))

        loss = F.cross_entropy(y_hat, y.to('cuda'))
        avg_loss += loss.cpu().detach().numpy()

        acc = (torch.topk(y_hat,1)[1].cpu().squeeze(1) == y).numpy().sum()
        avg_acc += acc

avg_loss = avg_loss/len(dataloader_test)
avg_acc = avg_acc/len(dataset_test)
print(avg_loss)
print(avg_acc)

100%|█████████████████████████████████████████| 474/474 [00:34<00:00, 13.84it/s]

1.7598532428218343
0.6997820199484774



