<a href="https://colab.research.google.com/github/nefario7/cmu-deeplearning/blob/working-hw1/hw1_pt2_updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from IPython.display import clear_output 
! apt-get install -y -qq software-properties-common python-software-properties module-init-tools
! add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
! apt-get update -qq 2>&1 > /dev/null
! apt-get -y install -qq google-drive-ocamlfuse fuse

from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass

! google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
! echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
% cd /content
! mkdir cmudrive
% cd ..
! google-drive-ocamlfuse /content/cmudrive
! pip install kaggle wandb torch-summary
! mkdir ~/.kaggle
! cp /content/cmudrive/IDL/kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! wandb login

! pip install --upgrade --force-reinstall --no-deps kaggle 
! kaggle config set -n path -v /content
! kaggle competitions download -c 11-785-s22-hw1p2
! unzip -q /content/competitions/11-785-s22-hw1p2/11-785-s22-hw1p2.zip -d /content/hw1-data

clear_output()

In [2]:
import os
import csv
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
#!pip install --upgrade --force-reinstall --no-deps kaggle

In [3]:
import wandb
import yaml
import time
import csv
import pandas as pd
from torchsummary import summary
from torch.cuda.amp import GradScaler, autocast
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [43]:
def model_saving(path, args, save_metadata=True, exp="Experiment"):
    save_name = ''
    if not args['CSV_PATH']:
        save_name += "full_"

    for parameter, val in args.items():
        abbr = parameter[0] if len(parameter) > 2 else parameter
        if parameter == 'lr' :
            data = abbr + str(val)
            save_name += data
            break
        else:
            data = abbr + str(val) + '_'
            save_name += data

    save_path = os.path.join(path, save_name)
    try:
        os.mkdir(save_path)
    except FileExistsError:
        d = input("Model name already exists. Delete existing model? (y/n)")
        if d == 'y':
            import shutil
            shutil.rmtree(save_path)
            os.mkdir(save_path)
        else:
            return None

    with open(os.path.join(save_path, 'model_parameters.yaml'), 'w') as metadata:
        yaml.dump({'Experiment': exp}, metadata, indent=8, default_flow_style=False)
        yaml.dump(args, metadata, indent=4, default_flow_style=False)

    return save_path

def generate_submission(save_path, labels, timestamp): 
    sub_dir = save_path + timestamp
    os.mkdir(sub_dir)
    sub_path = os.path.join(sub_dir, 'submission.csv')

    with open(sub_path, 'w') as f:
        csvwrite = csv.writer(f)
        csvwrite.writerow(['id', 'label'])
        for i in range(len(labels)):
            csvwrite.writerow([i, labels[i]])

    print(f"File saved at : {sub_path}")
    print(f"Preview of submission.csv")
    df = pd.read_csv(sub_path)
    df.head()

    return sub_path

In [44]:
class Network(torch.nn.Module):
    def __init__(self, context=0):
        super(Network, self).__init__()
        # TODO: Please try different architectures
        c = (1 + 2 * context)
        INPUT_SIZE = c * 13
        NUM_CLASSES = 40

        layers = [
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(num_features = 512),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(512, 256),
            nn.BatchNorm1d(num_features = 256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, 128),
            nn.BatchNorm1d(num_features = 128),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(128, NUM_CLASSES),
        ]
        
        self.classifier = nn.Sequential(*layers)

    def forward(self, A0):
        x = self.classifier(A0)
        return x

In [46]:
class LibriSamples(torch.utils.data.Dataset):
    def __init__(self, data_path, sample=20000, shuffle=True, partition="dev-clean", csvpath=None):
        # sample represent how many npy files will be preloaded for one __getitem__ call
        self.sample = sample 
        
        self.X_dir = data_path + "/" + partition + "/mfcc/"
        self.Y_dir = data_path + "/" + partition +"/transcript/"
        
        self.X_names = os.listdir(self.X_dir)
        self.Y_names = os.listdir(self.Y_dir)

        # using a small part of the dataset to debug
        if csvpath:
            subset = self.parse_csv(csvpath)
            self.X_names = [i for i in self.X_names if i in subset]
            self.Y_names = [i for i in self.Y_names if i in subset]
        
        if shuffle == True:
            XY_names = list(zip(self.X_names, self.Y_names))
            random.shuffle(XY_names)
            self.X_names, self.Y_names = zip(*XY_names)
        
        assert(len(self.X_names) == len(self.Y_names))
        self.length = len(self.X_names)
        
        self.PHONEMES = [
            'SIL',   'AA',    'AE',    'AH',    'AO',    'AW',    'AY',  
            'B',     'CH',    'D',     'DH',    'EH',    'ER',    'EY',
            'F',     'G',     'HH',    'IH',    'IY',    'JH',    'K',
            'L',     'M',     'N',     'NG',    'OW',    'OY',    'P',
            'R',     'S',     'SH',    'T',     'TH',    'UH',    'UW',
            'V',     'W',     'Y',     'Z',     'ZH',    '<sos>', '<eos>']
      
    @staticmethod
    def parse_csv(filepath):
        subset = []
        with open(filepath) as f:
            f_csv = csv.reader(f)
            for row in f_csv:
                subset.append(row[1])
        return subset[1:]

    def __len__(self):
        return int(np.ceil(self.length / self.sample))
        
    def __getitem__(self, i):
        sample_range = range(i*self.sample, min((i+1)*self.sample, self.length))
        
        X, Y = [], []
        for j in sample_range:
            X_path = self.X_dir + self.X_names[j]
            Y_path = self.Y_dir + self.Y_names[j]
            
            label = [self.PHONEMES.index(yy) for yy in np.load(Y_path)][1:-1]

            X_data = np.load(X_path)
            X_data = (X_data - X_data.mean(axis=0))/X_data.std(axis=0)
            X.append(X_data)
            Y.append(np.array(label))
            
        X, Y = np.concatenate(X), np.concatenate(Y)
        return X, Y
    
class LibriItems(torch.utils.data.Dataset):
    def __init__(self, X, Y, context = 0):
        assert(X.shape[0] == Y.shape[0])
        
        self.length  = X.shape[0]
        self.context = context

        if context == 0:
            self.X, self.Y = X, Y
        else:
            X = np.pad(X, ((context,context), (0,0)), 'constant', constant_values=(0,0))
            self.X, self.Y = X, Y
        
    def __len__(self):
        return self.length
        
    def __getitem__(self, i):
        if self.context == 0:
            xx = self.X[i].flatten()
            yy = self.Y[i]
        else:
            xx = self.X[i:(i + 2*self.context + 1)].flatten()
            yy = self.Y[i]
        return xx, yy

In [47]:
def train(args, model, device, train_samples, optimizer, criterion, epoch):
    model.train()
    scaler = GradScaler()
    for i in range(len(train_samples)):
        X, Y = train_samples[i]
        train_items = LibriItems(X, Y, context=args['context'])
        train_loader = torch.utils.data.DataLoader(train_items, batch_size=args['batch_size'], num_workers=2, pin_memory=True, shuffle=True)

        for batch_idx, (data, target) in enumerate(train_loader):
            data = data.float().to(device)
            target = target.long().to(device)

            optimizer.zero_grad(set_to_none=True)
            with autocast():
                output = model(data)
                loss = criterion(output, target)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            if batch_idx % args['log_interval'] == 0:
                if args['log']:
                    wandb.log({"Training Loss": loss.item()})
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))

def test(args, model, device, dev_samples):
    model.eval()
    true_y_list = []
    pred_y_list = []
    with torch.no_grad():
        for i in range(len(dev_samples)):
            X, Y = dev_samples[i]

            test_items = LibriItems(X, Y, context=args['context'])
            test_loader = torch.utils.data.DataLoader(test_items, batch_size=args['batch_size'], shuffle=False)

            for data, true_y in test_loader:
                data = data.float().to(device)
                true_y = true_y.long().to(device)                
                
                output = model(data)
                pred_y = torch.argmax(output, axis=1)

                pred_y_list.extend(pred_y.tolist())
                true_y_list.extend(true_y.tolist())

    train_accuracy =  accuracy_score(true_y_list, pred_y_list)
    return train_accuracy

In [None]:
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = Network(args['context']).to(device)

    optimizer = optim.Adam(model.parameters(), lr=args['lr'])

    criterion = torch.nn.CrossEntropyLoss()

    train_samples = LibriSamples(data_path = args['LIBRI_PATH'], shuffle=True, partition="train-clean-100", csvpath=args['CSV_PATH'])
    dev_samples = LibriSamples(data_path = args['LIBRI_PATH'], shuffle=True, partition="dev-clean")

    if args['log']:
        wandb.init(project="phenome-hw1", entity="nefario7", config=args)

    for epoch in range(1, args['epoch'] + 1):
        train(args, model, device, train_samples, optimizer, criterion, epoch)
        test_acc = test(args, model, device, dev_samples)

        if args['log']:
            wandb.log({"Accuracy": test_acc * 100})
        print('Validation Accuracy ', test_acc)

    if args['save']:
        model_path = model_saving(path=r'/content/cmudrive/IDL/hw1-models-other', args=args, save_metadata=True, exp="Suggestion Med")
        torch.save(model, os.path.join(model_path, "model.pt"))
        print("Model saved at : ", model_path)

    if args['log']:
        wandb.finish()

In [None]:
args = {
    '': 'Sample',
    'batch_size': 65536,
    'epoch': 10,
    'context': 16,
    'lr': 0.001,
    'LIBRI_PATH': '/content/hw1-data/hw1p2_student_data',
    'CSV_PATH': '/content/hw1-data/train_filenames_subset_8192_v2.csv',
    'log_interval': 500,
    'save' : True,
    'log' : True
}

main(args)

## Submission

In [38]:
import csv
from tqdm import tqdm
import os, datetime

class SubmissionSamples(torch.utils.data.Dataset):
    def __init__(self, data_path, csv_path, sample=20000, shuffle=False, partition="test-clean"):
        # sample represent how many npy files will be preloaded for one __getitem__ call
        self.sample = sample 
        self.X_dir = data_path + "/" + partition + "/mfcc/"
        self.X_names = os.listdir(self.X_dir)

        if csv_path:
            self.X_names = list(pd.read_csv(csv_path).file)
        
        self.length = len(self.X_names)
        self.PHONEMES = [
            'SIL',   'AA',    'AE',    'AH',    'AO',    'AW',    'AY',  
            'B',     'CH',    'D',     'DH',    'EH',    'ER',    'EY',
            'F',     'G',     'HH',    'IH',    'IY',    'JH',    'K',
            'L',     'M',     'N',     'NG',    'OW',    'OY',    'P',
            'R',     'S',     'SH',    'T',     'TH',    'UH',    'UW',
            'V',     'W',     'Y',     'Z',     'ZH',    '<sos>', '<eos>']

    def __len__(self):
        return int(np.ceil(self.length / self.sample))
        
    def __getitem__(self, i):
        sample_range = range(i*self.sample, min((i+1)*self.sample, self.length))
        
        X, Y = [], []
        for j in sample_range:
            X_path = self.X_dir + self.X_names[j]
            X_data = np.load(X_path)
            X_data = (X_data - X_data.mean(axis=0))/X_data.std(axis=0)
            X.append(X_data)

        X = np.concatenate(X)
        return X
        
class SubmissionItems(torch.utils.data.Dataset):
    def __init__(self, X, context = 0):   
        self.length  = X.shape[0]
        self.context = context

        if context != 0:
            X = np.pad(X, ((context,context), (0,0)), 'constant', constant_values=(0,0))
        self.X = X
        
    def __len__(self):
        return self.length
        
    def __getitem__(self, i):
        if self.context == 0:
            xx = self.X[i].flatten()
        else:
            xx = self.X[i:(i + 2 * self.context + 1)].flatten()
        return xx

In [39]:
# TODO: Implement ensembling
model_name = r'fullSample_b65536_e1_c16_lr0.001'
model_type = r'hw1-models-other'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_dir = r'/content/cmudrive/IDL'
model_path = os.path.join(model_dir, model_type, model_name, 'model.pt')
meta_path = os.path.join(model_dir,  model_type, model_name, 'model_parameters.yaml')

speech_model = torch.load(model_path).to(device)
test_samples = SubmissionSamples(data_path = r'/content/hw1-data/hw1p2_student_data', csv_path=r'/content/hw1-data/test_order.csv')

speech_model.eval()
labels = []

with torch.no_grad():
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    for i in range(len(test_samples)):
        X = test_samples[i]

        test_items = SubmissionItems(X, context=args['context'])
        test_loader = torch.utils.data.DataLoader(test_items, batch_size=args['batch_size'], shuffle=False)

        for data in tqdm(test_loader):
            data = data.float().to(device)              
        
            output = speech_model(data)
            y = torch.argmax(output, axis=1)

            labels.extend(y.tolist())

100%|██████████| 30/30 [00:14<00:00,  2.06it/s]


In [40]:
sub_path = generate_submission(r'/content/cmudrive/IDL/hw1-submission/', labels, timestamp)
! kaggle competitions submit -c 11-785-s22-hw1p2 -f $sub_path -m "Sample Submission"

File saved at : /content/cmudrive/IDL/hw1-submission/2022-02-03_19-36-42/submission.csv
Preview of submission.csv
100% 20.5M/20.5M [00:02<00:00, 7.57MB/s]
Successfully submitted to Frame-Level Speech Recognition