In [None]:
!mkdir .kaggle

import os, zipfile, tarfile
os.environ['KAGGLE_USERNAME'] = "ruoxinhuang" 
os.environ['KAGGLE_KEY'] = "f9d6d5aade71452503766f80812da73d"
!kaggle competitions download -c 11-785-fall-20-homework-1-part-2

! unzip test.npy.zip -d kaggle
! unzip dev.npy.zip -d kaggle
! unzip dev_labels.npy.zip -d kaggle
! unzip train.npy.zip -d kaggle
! unzip train_labels.npy.zip -d kaggle

In [None]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

train_x = np.load('kaggle/train.npy',allow_pickle=True)
train_y = np.load('kaggle/train_labels.npy',allow_pickle=True)
test_x = np.load('kaggle/dev.npy',allow_pickle=True)
test_y = np.load('kaggle/dev_labels.npy',allow_pickle=True)
final_x = np.load('kaggle/test.npy',allow_pickle=True)

In [None]:
context = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
class SpeechDataset(Dataset):
    def __init__(self, X, Y=None, context=20):
        self.curr_utt = 0
        self.utt_lengths = np.zeros(len(X)) #stores number of data points until utt
        self.d = {} #which utt datapoint belongs to
        self.context = context
        
        for idx,utt in enumerate(X):
            if idx == 0:
                self.utt_lengths[idx] = len(utt)
            else:
                self.utt_lengths[idx] = self.utt_lengths[idx-1]+len(utt)
                
            for i in range(len(utt)):
                if idx == 0:
                    self.d[i] = idx
                else:
                    self.d[i+self.utt_lengths[idx-1]] = idx
        
        pad = np.zeros((self.context,13))

        self.X = [torch.tensor(np.concatenate((pad,utt,pad),axis=0)) for utt in X]
        if Y is None:
          self.Y = None
        else:
          self.Y = [torch.tensor(utt) for utt in Y]
        

    def __len__(self):
        return int(self.utt_lengths[-1])


    def __getitem__(self, idx):
        utt_num = int(self.d[idx])
        if utt_num!=0:
          idx = int(idx - self.utt_lengths[utt_num-1])
        
        
        X = self.X[utt_num][idx:idx+2*self.context+1].flatten()
        if self.Y is None:
          Y = None
        else:
          Y = self.Y[utt_num][idx]

        return (X,Y)

In [None]:
def get_dataloader(trainset, valset = None, batch_size = 384, num_workers = 4):
    train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers,drop_last = True)
    
    if(valset is None):
        val_loader = None
    else:
        val_loader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers,drop_last = True)
        
    return (train_loader, val_loader)

In [None]:
trainset = SpeechDataset(train_x,train_y)
train_loader, _ = get_dataloader(trainset)

In [None]:
def train_one_epoch(model, train_loader, optimizer):
    
    model.train()
    total_loss = 0
    count = 0
    acc = 0
    
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        
        y_hat = model(x.float())
        loss = criterion(y_hat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        #compute loss and accuracy 
        count += 1
        total_loss += loss.item()
        values, indices = y_hat.max(1)
        acc += (y-indices == 0).sum(dim=0).item()
    
    return(total_loss/count, acc/len(train_loader.dataset))

In [None]:
model = nn.Sequential(
            nn.Linear((context*2+1)*13, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048),
            nn.Linear(2048, 4096),
            nn.ReLU(),
            nn.BatchNorm1d(4096),
            nn.Linear(4096, 5120),
            nn.ReLU(),
            nn.BatchNorm1d(5120),
            nn.Linear(5120, 4096),
            nn.ReLU(),
            nn.BatchNorm1d(4096),
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 346)
        )
model.to(device)

optimizer = optim.Adam(model.parameters())
criterion = nn.BCELOSS()



In [None]:
#train for 3 epoches
for i in range(3):
  avg_train_loss, train_accuracy = train_one_epoch(model, train_loader, optimizer)
  print(train_accuracy)

In [None]:
def validate_model(model, val_loader):  
    """
    Validate a given model with a validation dataloader.
    
    args:
        model (nn.Module): the trained model 
        val_loader (DataLoader): iterable for valset minibatches

    return: Tuple(avg_val_loss, val_accuracy, val_time)
        avg_val_loss (float): average validation loss across batches
        val_accuracy (float): portion of correctly classified images in the validation dataset
        val_time (float): the time taken to run this function
    """
    
    model.eval()
    torch.no_grad()
    total_loss = 0
    count = 0
    acc = 0
    
    for x, y in val_loader:
        x = x.to(device)
        y = y.to(device)
        
        y_hat = model(x.float())
        loss = criterion(y_hat, y)
        
        #compute loss and accuracy 
        count += 1
        total_loss += loss.item()
        values, indices = y_hat.max(1)
        acc += (y-indices == 0).sum(dim=0).item()

    
    
    return(total_loss/count, acc/len(val_loader.dataset))

In [None]:
valset = SpeechDataset(test_x,test_y)
train_loader, val_loader = get_dataloader(trainset,valset)
avg_val_loss, val_accuracy = validate_model(model, val_loader)
print(val_accuracy)

In [None]:
finalset = SpeechDataset(final_x)

In [None]:
model.eval()
result = []

for x, y in finalset:
    x = x.to(device)
    y_hat = model(x.unsqueeze_(0).float())
    values, indices = y_hat.max(1)
    result.append(indices)

In [None]:
result = [np.array(x.cpu()) for x in result]

In [None]:
import csv

with open('sample.csv', 'w') as csvfile:
  csvwriter = csv.writer(csvfile)
  csvwriter.writerow(('id','label'))
  for i, row in enumerate(result):
    csvwriter.writerow((i,row.item()))
