<a href="https://colab.research.google.com/github/nefario7/cmu-deeplearning/blob/working-hw1/hw1_pt2_updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Mount drive and download dataset

In [1]:
from IPython.display import clear_output 
! apt-get install -y -qq software-properties-common python-software-properties module-init-tools
! add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
! apt-get update -qq 2>&1 > /dev/null
! apt-get -y install -qq google-drive-ocamlfuse fuse

from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass

! google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
! echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
% cd /content
! mkdir cmudrive
% cd ..
! google-drive-ocamlfuse /content/cmudrive
! pip install kaggle wandb torch-summary
! mkdir ~/.kaggle
! cp /content/cmudrive/IDL/kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! wandb login

! pip install --upgrade --force-reinstall --no-deps kaggle 
! kaggle config set -n path -v /content
! kaggle competitions download -c 11-785-s22-hw1p2
! unzip -q /content/competitions/11-785-s22-hw1p2/11-785-s22-hw1p2.zip -d /content/hw1-data

clear_output()

### Dependencies

In [2]:
import os
import csv
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

In [3]:
import wandb
import yaml
import time
import csv
import pandas as pd
from torchsummary import summary
from torch.cuda.amp import GradScaler, autocast
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau
# from pytorch_lightning.callbacks.early_stopping import EarlyStopping

### Network Architecture and Dataloaders

In [20]:
def model_saving(path, args, save_metadata=True, exp="Experiment", ensemble=False):
    save_name = ''
    if not args['CSV_PATH']:
        save_name += "full_"

    for parameter, val in args.items():
        abbr = parameter[0] if len(parameter) > 2 else parameter
        if parameter == 'lr' :
            data = abbr + str(val)
            save_name += data
            break
        else:
            data = abbr + str(val) + '_'
            save_name += data

    if ensemble:
        save_name = save_name + "-ver" + str(np.random.randint(10, 100))

    print(save_name)
    save_path = os.path.join(path, save_name)
    try:
        os.mkdir(save_path)
    except FileExistsError:
            d = input("Model name already exists. Delete existing model? (y/n)")
            if d == 'y':
                import shutil
                shutil.rmtree(save_path)
                os.mkdir(save_path)
            else:
                return None

    with open(os.path.join(save_path, 'model_parameters.yaml'), 'w') as metadata:
        yaml.dump({'Experiment': exp}, metadata, indent=8, default_flow_style=False)
        yaml.dump(args, metadata, indent=4, default_flow_style=False)

    return save_path

def initialize_weights(m):
  if isinstance(m, nn.BatchNorm2d):
      nn.init.constant_(m.weight.data, 1)
      nn.init.constant_(m.bias.data, 0)
  elif isinstance(m, nn.Linear):
      nn.init.kaiming_uniform_(m.weight.data)
      nn.init.constant_(m.bias.data, 0)
    

In [21]:
class Network(torch.nn.Module):
    def __init__(self, context=0):
        super(Network, self).__init__()
        # TODO: Please try different architectures
        c = (1 + 2 * context)
        INPUT_SIZE = c * 13
        NUM_CLASSES = 40

        layers = [
            nn.Linear(INPUT_SIZE, 1024),
            # nn.BatchNorm1d(num_features = 512),
            nn.ReLU(),
            nn.BatchNorm1d(num_features = 1024),
            nn.Dropout(0.1),

            nn.Linear(1024, 512),
            # nn.BatchNorm1d(num_features = 512),
            nn.ReLU(),
            nn.BatchNorm1d(num_features = 512),
            nn.Dropout(0.1),
            
            nn.Linear(512, 256),
            # nn.BatchNorm1d(num_features = 256),
            nn.ReLU(),
            nn.BatchNorm1d(num_features = 256),
            nn.Dropout(0.1),
            
            nn.Linear(256, NUM_CLASSES),
        ]
        
        self.classifier = nn.Sequential(*layers)

    def forward(self, A0):
        x = self.classifier(A0)
        return x

In [22]:
class LibriSamples(torch.utils.data.Dataset):
    def __init__(self, data_path, sample=20000, shuffle=True, partition="dev-clean", csvpath=None):
        # sample represent how many npy files will be preloaded for one __getitem__ call
        self.sample = sample 
        
        self.X_dir = data_path + "/" + partition + "/mfcc/"
        self.Y_dir = data_path + "/" + partition +"/transcript/"
        
        self.X_names = os.listdir(self.X_dir)
        self.Y_names = os.listdir(self.Y_dir)

        # using a small part of the dataset to debug
        if csvpath:
            subset = self.parse_csv(csvpath)
            self.X_names = [i for i in self.X_names if i in subset]
            self.Y_names = [i for i in self.Y_names if i in subset]
        
        if shuffle == True:
            XY_names = list(zip(self.X_names, self.Y_names))
            random.shuffle(XY_names)
            self.X_names, self.Y_names = zip(*XY_names)
        
        assert(len(self.X_names) == len(self.Y_names))
        self.length = len(self.X_names)
        
        self.PHONEMES = [
            'SIL',   'AA',    'AE',    'AH',    'AO',    'AW',    'AY',  
            'B',     'CH',    'D',     'DH',    'EH',    'ER',    'EY',
            'F',     'G',     'HH',    'IH',    'IY',    'JH',    'K',
            'L',     'M',     'N',     'NG',    'OW',    'OY',    'P',
            'R',     'S',     'SH',    'T',     'TH',    'UH',    'UW',
            'V',     'W',     'Y',     'Z',     'ZH',    '<sos>', '<eos>']
      
    @staticmethod
    def parse_csv(filepath):
        subset = []
        with open(filepath) as f:
            f_csv = csv.reader(f)
            for row in f_csv:
                subset.append(row[1])
        return subset[1:]

    def __len__(self):
        return int(np.ceil(self.length / self.sample))
        
    def __getitem__(self, i):
        sample_range = range(i*self.sample, min((i+1)*self.sample, self.length))
        
        X, Y = [], []
        for j in sample_range:
            X_path = self.X_dir + self.X_names[j]
            Y_path = self.Y_dir + self.Y_names[j]
            
            label = [self.PHONEMES.index(yy) for yy in np.load(Y_path)][1:-1]

            X_data = np.load(X_path)
            X_data = (X_data - X_data.mean(axis=0))/X_data.std(axis=0)
            X.append(X_data)
            Y.append(np.array(label))
            
        X, Y = np.concatenate(X), np.concatenate(Y)
        return X, Y
    
class LibriItems(torch.utils.data.Dataset):
    def __init__(self, X, Y, context = 0):
        assert(X.shape[0] == Y.shape[0])
        
        self.length  = X.shape[0]
        self.context = context

        if context == 0:
            self.X, self.Y = X, Y
        else:
            X = np.pad(X, ((context,context), (0,0)), 'constant', constant_values=(0,0))
            self.X, self.Y = X, Y
        
    def __len__(self):
        return self.length
        
    def __getitem__(self, i):
        if self.context == 0:
            xx = self.X[i].flatten()
            yy = self.Y[i]
        else:
            xx = self.X[i:(i + 2*self.context + 1)].flatten()
            yy = self.Y[i]
        return xx, yy

### Training and Testing

In [23]:
def train(args, model, device, train_samples, optimizer, criterion, epoch):
    model.train()
    scaler = GradScaler()

    for i in range(len(train_samples)):
        X, Y = train_samples[i]
        train_items = LibriItems(X, Y, context=args['context'])
        train_loader = torch.utils.data.DataLoader(train_items, batch_size=args['batch_size'], num_workers=2, pin_memory=True, shuffle=True)

        for batch_idx, (data, target) in enumerate(train_loader):
            data = data.float().to(device)
            target = target.long().to(device)

            optimizer.zero_grad(set_to_none=True)
            with autocast():
                output = model(data)
                loss = criterion(output, target)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            if batch_idx % args['log_interval'] == 0:
                if args['log']:
                    wandb.log({"Training Loss": loss.item()})
                    
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))

def test(args, model, device, criterion, dev_samples):
    model.eval()
    true_y_list = []
    pred_y_list = []
    with torch.no_grad():
        for i in range(len(dev_samples)):
            X, Y = dev_samples[i]

            test_items = LibriItems(X, Y, context=args['context'])
            test_loader = torch.utils.data.DataLoader(test_items, batch_size=args['batch_size'], shuffle=False)

            for data, true_y in test_loader:
                data = data.float().to(device)
                true_y = true_y.long().to(device)                
                
                output = model(data)
                val_loss = criterion(output, true_y)
                if args['log']:
                    wandb.log({"Validation Loss": val_loss.item()})
                pred_y = torch.argmax(output, axis=1)

                pred_y_list.extend(pred_y.tolist())
                true_y_list.extend(true_y.tolist())

    train_accuracy =  accuracy_score(true_y_list, pred_y_list)
    return train_accuracy, val_loss

In [24]:
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Model
    model = Network(args['context']).to(device)

    # Weight Initialization
    # if args["weight"] is not None:
    #     print("Initializing Weights")
    #     model.apply(initialize_weights)

    # Optimizer
    if args["optimizer"] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=args['lr'], weight_decay=1e-5)
    elif args["optimizer"] == 'sgdn':
        optimizer = optim.SGD(model.parameters(), lr=args['lr'], momentum=0.9, nesterov=True)

    # Scheduler
    if args["scheduler"] == 'rlrop':
        scheduler = ReduceLROnPlateau(optimizer, 'min')
    if args["scheduler"] == 'exp':
        scheduler = ExponentialLR(optimizer, gamma=0.9)

    # Loss
    criterion = torch.nn.CrossEntropyLoss()

    train_samples = LibriSamples(data_path = args['LIBRI_PATH'], shuffle=True, partition="train-clean-100", csvpath=args['CSV_PATH'])
    dev_samples = LibriSamples(data_path = args['LIBRI_PATH'], shuffle=True, partition="dev-clean")

    if args['log']:
        print("Initializing W&B")
        wandb.init(project="phenome-hw1", entity="nefario7", config=args)

    print("Training in progress...")
    for epoch in range(1, args['epoch'] + 1):
        start = time.time()
        train(args, model, device, train_samples, optimizer, criterion, epoch)
        test_acc, val_loss = test(args, model, device, criterion, dev_samples)

        if args["scheduler"] is not None:
            scheduler.step(val_loss)
        end = time.time()

        if args['log']:
            wandb.log({"Accuracy": test_acc * 100})
        print(f'Validation Accuracy = {test_acc * 100}%')
        print(f'Time taken = {end - start} secs\n')

    print("Training Complete!")

    if args['save']:
        model_path = model_saving(
            path=r'/content/cmudrive/IDL/hw1-models-other', 
            args=args, 
            save_metadata=True, 
            exp="Suggestion Med",
            ensemble=args["ensemble"]
            )
        torch.save(model, os.path.join(model_path, "model.pt"))
        print("Model saved at : ", model_path)

    if args['log']:
        wandb.finish()

In [25]:
args = {
    '': 'NewArch',
    'batch_size': 16384,
    'epoch': 10,
    'context': 16,
    'bn': 'after',
    'weight': None, 
    'optimizer': 'adam',
    'scheduler': None,
    'lr': 0.001,
    'arch': [1024, 512, 256],
    'LIBRI_PATH': '/content/hw1-data/hw1p2_student_data',
    'CSV_PATH': None,
    'log_interval': 500,
    'save' : True,
    'log' : True,
    'ensemble': True
}

for b in [16384, 16384]:
    torch.cuda.empty_cache()
    args['batch_size'] = b
    main(args)

Initializing W&B


VBox(children=(Label(value=' 0.34MB of 0.34MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Accuracy,▁▄▅▆▇▇▇███
Training Loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation Loss,█▇▃█▅▄▄▅▄▃▅▄▄▅▅▃▁▃▂▄▃▃▃▂▂▃▅▂▃▄▄▄▃▂▃▁▄▄▅▁

0,1
Accuracy,81.22943
Training Loss,0.56875
Validation Loss,0.66499


Training in progress...
Validation Accuracy = 78.34571013307898%
Time taken = 295.0830192565918 secs

Validation Accuracy = 79.55340294896092%
Time taken = 295.8716096878052 secs

Validation Accuracy = 80.07768790232342%
Time taken = 295.55510115623474 secs

Validation Accuracy = 80.42168861251842%
Time taken = 294.3375234603882 secs

Validation Accuracy = 80.69606337251794%
Time taken = 285.83742690086365 secs

Validation Accuracy = 80.83113461911663%
Time taken = 287.9629428386688 secs

Validation Accuracy = 80.94390904548963%
Time taken = 286.0062472820282 secs

Validation Accuracy = 81.06710929983856%
Time taken = 290.0517385005951 secs

Validation Accuracy = 81.21043862800232%
Time taken = 292.7306218147278 secs

Validation Accuracy = 81.23670964998121%
Time taken = 296.219685792923 secs

Training Complete!
full_NewArch_b16384_e10_c16_bnafter_wNone_oadam_sNone_lr0.001-ver97
Model saved at :  /content/cmudrive/IDL/hw1-models-other/full_NewArch_b16384_e10_c16_bnafter_wNone_oadam_sNo

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Accuracy,▁▄▅▆▇▇▇███
Training Loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation Loss,█▆▅▅▇▆▆▇▅▇▅▃▅▄▄▄▅▄▃▄▃▃▃▅▄▆▄▃▁▃▂▃▅▃▄▂▅▂▄▃

0,1
Accuracy,81.23671
Training Loss,0.57809
Validation Loss,0.52409


Initializing W&B


Training in progress...
Validation Accuracy = 78.26911642656295%
Time taken = 293.6686055660248 secs

Validation Accuracy = 79.55980296217385%
Time taken = 293.90192580223083 secs

Validation Accuracy = 80.12372670704868%
Time taken = 294.0628364086151 secs

Validation Accuracy = 80.4665919310285%
Time taken = 294.68378257751465 secs

Validation Accuracy = 80.7141795389513%
Time taken = 295.55179262161255 secs

Validation Accuracy = 80.87268309199091%
Time taken = 294.87785243988037 secs

Validation Accuracy = 81.01549629005686%
Time taken = 296.06989908218384 secs

Validation Accuracy = 81.12858039448855%
Time taken = 297.75366497039795 secs

Validation Accuracy = 81.2404257866855%
Time taken = 299.59158992767334 secs

Validation Accuracy = 81.31913562660257%
Time taken = 298.7042908668518 secs

Training Complete!
full_NewArch_b16384_e10_c16_bnafter_wNone_oadam_sNone_lr0.001-ver13
Model saved at :  /content/cmudrive/IDL/hw1-models-other/full_NewArch_b16384_e10_c16_bnafter_wNone_oadam_

VBox(children=(Label(value=' 0.10MB of 0.10MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Accuracy,▁▄▅▆▇▇▇███
Training Loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation Loss,▅▅█▇▅▆▅▇▅▄▅▅▇▃▃▆▃▃▅▂▆▁▄▆▃▃▃▄▃▃▂▃▂▃▂▃▁▄▄▂

0,1
Accuracy,81.31914
Training Loss,0.57793
Validation Loss,0.5621


## Submission

In [83]:
import csv, yaml
from tqdm import tqdm
import os, datetime

class SubmissionSamples(torch.utils.data.Dataset):
    def __init__(self, data_path, csv_path, sample=20000, shuffle=False, partition="test-clean"):
        # sample represent how many npy files will be preloaded for one __getitem__ call
        self.sample = sample 
        self.X_dir = data_path + "/" + partition + "/mfcc/"
        self.X_names = os.listdir(self.X_dir)

        if csv_path:
            self.X_names = list(pd.read_csv(csv_path).file)
        
        self.length = len(self.X_names)
        self.PHONEMES = [
            'SIL',   'AA',    'AE',    'AH',    'AO',    'AW',    'AY',  
            'B',     'CH',    'D',     'DH',    'EH',    'ER',    'EY',
            'F',     'G',     'HH',    'IH',    'IY',    'JH',    'K',
            'L',     'M',     'N',     'NG',    'OW',    'OY',    'P',
            'R',     'S',     'SH',    'T',     'TH',    'UH',    'UW',
            'V',     'W',     'Y',     'Z',     'ZH',    '<sos>', '<eos>']

    def __len__(self):
        return int(np.ceil(self.length / self.sample))
        
    def __getitem__(self, i):
        sample_range = range(i*self.sample, min((i+1)*self.sample, self.length))
        
        X, Y = [], []
        for j in sample_range:
            X_path = self.X_dir + self.X_names[j]
            X_data = np.load(X_path)
            X_data = (X_data - X_data.mean(axis=0))/X_data.std(axis=0)
            X.append(X_data)

        X = np.concatenate(X)
        return X

class SubmissionItems(torch.utils.data.Dataset):
    def __init__(self, X, context = 0):   
        self.length  = X.shape[0]
        self.context = context

        if context != 0:
            X = np.pad(X, ((context,context), (0,0)), 'constant', constant_values=(0,0))
        self.X = X
        
    def __len__(self):
        return self.length
        
    def __getitem__(self, i):
        if self.context == 0:
            xx = self.X[i].flatten()
        else:
            xx = self.X[i:(i + 2 * self.context + 1)].flatten()
        return xx

class SubmissionInference():
    def __init__(self, model_name, model_type, data_path, csv_path):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.drive_dir = r'/content/cmudrive/IDL'
        self.model_name = model_name
        self.model_type = model_type

        self.model_path = os.path.join(self.drive_dir, model_type, model_name, 'model.pt')
        self.meta_path = os.path.join(self.drive_dir,  model_type, model_name, 'model_parameters.yaml')
        with open(self.meta_path, 'r') as meta:
            self.args = yaml.safe_load(meta)

        self.data_path = data_path
        self.order_csv_path = csv_path
        self.test_samples = SubmissionSamples(data_path = self.data_path, csv_path=self.order_csv_path)

    def __load_model(self, ensemble=False):
        if not ensemble:
            return torch.load(self.model_path).to(self.device)
        else:
            base_path = os.path.join(self.drive_dir, self.model_type)
            paths = [os.path.join(base_path, self.model_name, 'model.pt')]
            for dir in os.listdir(base_path):
                if dir.split('-ver')[0] == self.model_name:
                    temp = os.path.join(base_path, dir, 'model.pt')
                    paths.append(temp)
            models = dict()
            for path in paths:
                models[path] = torch.load(path).to(self.device)

            return models

    def __get_labels(self, imodel):
        labels = []
        print(self.args['context'], self.args['batch_size'])
        with torch.no_grad():
            for i in range(len(self.test_samples)):
                X = self.test_samples[i]
                test_items = SubmissionItems(X, context=self.args['context'])
                test_loader = torch.utils.data.DataLoader(test_items, batch_size=self.args['batch_size'], num_workers=2, pin_memory=True, shuffle=False)

                for data in tqdm(test_loader):
                    data = data.float().to(self.device)              
                    output = imodel(data)
                    y = torch.argmax(output, axis=1)
                    labels.extend(y.tolist())
        return labels

    def simple_inference(self):
        print("Running inference...")
        self.model = self.__load_model()
        self.timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

        self.model.eval()
        labels = self.__get_labels(self.model)
        
        return labels

    def ensemble_inference(self):
        # Beta 
        print("Running ensembled inference...")
        models = self.__load_model(ensemble=True)
        self.timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

        prelim_labels = []
        for path, model in models.items():
            print(path)
            model.eval()
            prelim_labels.append(self.__get_labels(model))

        labels_df = pd.DataFrame(prelim_labels)
        labels_df = labels_df.transpose()
        ensembled_labels = labels_df.max(axis=1).tolist()

        return ensembled_labels

    def generate_submission(self, save_path, labels): 
        sub_dir = os.path.join(self.drive_dir, save_path + self.timestamp)
        try:
            os.mkdir(sub_dir)
        except:
            print("Couldn't create folder for submission.csv")
            
        sub_path = os.path.join(sub_dir, 'submission.csv')

        with open(sub_path, 'w') as f:
            csvwrite = csv.writer(f)
            csvwrite.writerow(['id', 'label'])
            for i in range(len(labels)):
                csvwrite.writerow([i, labels[i]])

        print(f"File saved at : {sub_path}")
        return sub_path


In [None]:
model_name = r'full_NewArch_b16384_e10_c16_bnafter_wNone_oadam_sNone_lr0.001'
model_type = r'hw1-models-other'
data_path = r'/content/hw1-data/hw1p2_student_data'
csv_path = r'/content/hw1-data/test_order.csv'
sub_path = r'hw1-submission/'

inference = SubmissionInference(model_name, model_type, data_path, csv_path)

# Simple 
# labels = inference.simple_inference()
# Ensemble
labels = inference.ensemble_inference()

submission_path = inference.generate_submission(sub_path, labels)
print(f"Preview of submission.csv")
df = pd.read_csv(submission_path)
df.head()

In [85]:
print(submission_path)
! kaggle competitions submit -c 11-785-s22-hw1p2 -f $submission_path -m "Damn Submission"

/content/cmudrive/IDL/hw1-submission/2022-02-04_08-40-58/submission.csv
100% 20.5M/20.5M [00:02<00:00, 7.75MB/s]
Successfully submitted to Frame-Level Speech Recognition