In [1]:
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch.nn.functional as F
import numpy as np

In [6]:
import torch
import torch.nn as nn
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
print (cuda)

True


In [2]:
import os

class WSJ():
    """ Load the WSJ speech dataset
        
        Ensure WSJ_PATH is path to directory containing 
        all data files (.npy) provided on Kaggle.
        
        Example usage:
            loader = WSJ()
            trainX, trainY = loader.train
            assert(trainX.shape[0] == 24590)
            
    """
  
    def __init__(self):
        self.dev_set = None
        self.train_set = None
        self.test_set = None
  
    @property
    def dev(self):
        if self.dev_set is None:
            self.dev_set = load_raw(os.environ['./11-785hw1p2-f19/'], 'dev')
        return self.dev_set

    @property
    def train(self):
        if self.train_set is None:
            self.train_set = load_raw(os.environ['./11-785hw1p2-f19/'], 'train')
        return self.train_set
  
    @property
    def test(self):
        if self.test_set is None:
            self.test_set = (np.load(os.path.join(os.environ['./11-785hw1p2-f19/'], 'test.npy'), encoding='bytes'), None)
        return self.test_set
    
def load_raw(path, name):
    return (
        np.load(os.path.join(path, '{}.npy'.format(name)), encoding='bytes', allow_pickle=True), 
        np.load(os.path.join(path, '{}_labels.npy'.format(name)), encoding='bytes', allow_pickle=True)
    )

In [3]:
train_data = load_raw('./11-785hw1p2-f19/', 'train')
np.shape(train_data)

(2, 24500)

In [4]:
valid_data = load_raw('./11-785hw1p2-f19/', 'dev')
np.shape(valid_data)

(2, 1100)

In [5]:
test_data = np.load('./11-785hw1p2-f19/test.npy', encoding='bytes', allow_pickle=True)
np.shape(test_data)

(361,)

In [7]:
train, train_labels = train_data
val, val_labels = valid_data
test = test_data

In [53]:
# concatenate the whole dataset

concat_train = np.concatenate(train) #[i] for i in range(len(train))  
concat_train_labels = np.concatenate(train_labels) #[i] for i in range(len(trainlabels))
concat_val = np.concatenate(val) #[i] for i in range(len(trainlabels)
concat_valid_labels = np.concatenate(val_labels) #[i] for i in range(len(trainlabels)
concat_test = np.concatenate(test) #[i] for i in range(len(test))                                 

In [9]:
print(np.shape(concat_train))

(15388713, 40)


In [10]:
# mapframe for train data
mapframe = {}
mapframe[-1] = 0
for i in range(len(train)):
    mapframe[i] = len(train[i]) + mapframe[i-1]

In [54]:
mapframe_valid = {}
mapframe_valid[-1] = 0
for i in range(len(val)):
    mapframe_valid[i] = len(val[i]) + mapframe_valid[i-1]

In [55]:
# dictionary for train padding

dict = {}
len_train = len(train)
k = 12

for i in range(len_train):
#     print(i)
    #corresponding to every index, store how many elements to pad and before(0) or after(1)
    for j in range(mapframe[i-1], mapframe[i-1]+ k):
        dict[j] = [k - (j - mapframe[i-1]), 0]  
        
    for j in range(mapframe[i]-k, mapframe[i]):
        dict[j] = [1 + k- (mapframe[i] - j), 1]

In [56]:
dict_val = {}
len_val = len(val)
k = 12

for i in range(len_val):
#     print(i)
    #corresponding to every index, store how many elements to pad and before(0) or after(1)
    for j in range(mapframe_valid[i-1], mapframe_valid[i-1]+ k):
        dict_val[j] = [k - (j - mapframe_valid[i-1]), 0]  
        
    for j in range(mapframe_valid[i]-k, mapframe_valid[i]):
        dict_val[j] = [1 + k- (mapframe_valid[i] - j), 1]

In [14]:
class joinframe(Dataset):
    
    def __init__(self, x, y, k):
        super().__init__()
        assert len(x) == len(y)
        self._x = x
        self._y = y     
        self.k = k
            
    def __len__(self):
        return len(self._x)
      
    def __getitem__(self, index):
        
#         print("index: ", index)
        y_item = self._y[index]
        
        if index in dict :
            len_pad = dict[index][0]
            
            if dict[index][1] == 0:
                # before padding
#                 print("len_pad: ", dict[index][0])
                
                x_item = self._x[(index - (self.k - len_pad)) : (index + self.k+1)]
#                 print("data:", x_item.shape)
                padding = np.zeros((dict[index][0], 40))
#                 print("padding:", padding.shape)
                x_item = np.vstack((padding, x_item))
                x_item = x_item.flatten()             
          
            elif dict[index][1] == 1:
                # after padding
                x_item = self._x[(index - self.k) : (index + 1 + (self.k-len_pad))]
                padding = np.zeros((dict[index][0], 40))
                x_item = np.vstack((x_item, padding))
                x_item = x_item.flatten()
                
        else:
            x_item = self._x[index - self.k : index + self.k + 1]
            x_item = x_item.flatten()

        return x_item, y_item

In [15]:
frameobj = joinframe(concat_train, concat_train_labels, k = 12) 
framedata = DataLoader(frameobj, batch_size=1024, shuffle=True, pin_memory=True, num_workers=2)

In [34]:
class ANN(nn.Module):
    
    def __init__(self, k = 12, output_size = 138):
        super(ANN, self).__init__()
        input_size = 40*(2*k+1)
        self.hiddens = [2048, 1024, 1024, 512, 512, 256]
#         self.hiddens = [1024, 1024, 512, 512, 256]
        #all linear layer
        self.linear1 = nn.Linear(input_size, self.hiddens[0])
        self.linear2 = nn.Linear(self.hiddens[0], self.hiddens[1])
        self.linear3 = nn.Linear(self.hiddens[1], self.hiddens[2])
        self.linear4 = nn.Linear(self.hiddens[2], self.hiddens[3])
        self.linear5 = nn.Linear(self.hiddens[3], self.hiddens[4])
        self.linear6 = nn.Linear(self.hiddens[4], self.hiddens[5])
        self.linear7 = nn.Linear(self.hiddens[5], output_size)
#         self.linear5 = nn.Linear(self.hiddens[3], output_size)
    
        #all batch_norm layer
        self.bn1 = nn.BatchNorm1d(self.hiddens[0])
        self.bn2 = nn.BatchNorm1d(self.hiddens[1])
        self.bn3 = nn.BatchNorm1d(self.hiddens[2])
        self.bn4 = nn.BatchNorm1d(self.hiddens[3])
        self.bn5 = nn.BatchNorm1d(self.hiddens[4])
        self.bn6 = nn.BatchNorm1d(self.hiddens[5]) 
        
        self.do = nn.Dropout(p=0.2)
        self.relu = nn.ReLU()
        
    def forward(self, example):
        
        x = example
        x = self.relu(self.bn1(self.linear1(x)))
        x = self.relu(self.bn2(self.linear2(x)))
        x = self.relu(self.bn3(self.linear3(x)))
        x = self.relu(self.bn4(self.linear4(x)))
        x = self.relu(self.bn5(self.linear5(x)))
        x = self.relu(self.bn6(self.linear6(x)))
        out = self.linear7(x)
        
        return out

def init_randn(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0, 1)
    
model = ANN()
model.double()      

ANN(
  (linear1): Linear(in_features=1000, out_features=2048, bias=True)
  (linear2): Linear(in_features=2048, out_features=1024, bias=True)
  (linear3): Linear(in_features=1024, out_features=1024, bias=True)
  (linear4): Linear(in_features=1024, out_features=512, bias=True)
  (linear5): Linear(in_features=512, out_features=512, bias=True)
  (linear6): Linear(in_features=512, out_features=256, bias=True)
  (linear7): Linear(in_features=256, out_features=138, bias=True)
  (bn1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn5): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn6): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_run

In [35]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
# optimizer = torch.optim.SGD(model.parameters(), 0.001, momentum = 0.1, weight_decay = 1e-8)
loss_function = nn.CrossEntropyLoss()

In [36]:
from torch.utils.data import DataLoader, Dataset, TensorDataset

In [None]:
import time

epochs = 20
train_accuracy=[]
valid_accuracy=[]

for epoch in range(epochs):
    
    model.train()
    model.to(device)
    print('Epoch: ',epoch)
    train_losses = []
    valid_losses = []
    total = 0
    correct = 0
    runtime = 0
    
    start = time.time()
    frameobj = joinframe(concat_train, concat_train_labels, k = 12) 
    framedata = DataLoader(frameobj, batch_size= 512, shuffle=True, pin_memory=True, num_workers=0)

    for xbatch, ybatch in framedata:
        runtime+=1
        if runtime%1000==0:
            print('---------------------------',runtime)

        xbatch = xbatch.to(device)
        ybatch = ybatch.to(device)
        if len(xbatch) < 2:
            continue
        optimizer.zero_grad()

        ycap = model(xbatch)
        loss = loss_function(ycap, ybatch)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())

        train_predict = torch.argmax(ycap.data, 1)
        correct+=(train_predict==ybatch).sum().item()

        total+=ybatch.size(0)
    
    print(time.time() - start)
    accuracy = 100*correct/total
    train_accuracy.append(accuracy)
    print('train_accuracy = ', accuracy)
    
    
    model.eval()
    frameobj_val = joinframe(concat_valid, concat_valid_labels, k = 12) 
    framedata_val = DataLoader(frameobj_val, batch_size= 512, shuffle=True, pin_memory=True, num_workers=0)
    
    for xbatch, ybatch in framedata_val:
        
        xbatch = xbatch.to(device)
        ybatch = ybatch.to(device)
        if len(xbatch) < 2:
            continue

        ycap = model(xbatch)
        val_predict = torch.argmax(ycap.data, 1)
        correct+=(train_predict==ybatch).sum().item()

        total+=ybatch.size(0)
    
    print(time.time() - start)
    accuracy = 100*correct/total
    valid_accuracy.append(accuracy)
    print('valid_accuracy = ', accuracy)
    

Epoch:  0


KeyboardInterrupt: 

In [39]:
train_accuracy

[56.29520805281118,
 58.547644627591666,
 59.86857380470998,
 60.815690045034955,
 61.552619767488025,
 62.13710009407544,
 62.624229849500736,
 63.04267289928664,
 63.41198253551158,
 63.72363952723012,
 64.0239895305085,
 64.27421188503548,
 64.50647302344257,
 64.73094923532592,
 64.92802224591492,
 65.12117680016516,
 65.28875416677145,
 65.45496039857264,
 65.61492179365487,
 65.75520642954352,
 65.86845176721405,
 66.01080935098341,
 66.13293782267561,
 66.25350021148617,
 66.34952513572773]

In [40]:
mapframe_test = {}
mapframe_test[-1] = 0
for i in range(len(test)):
    mapframe_test[i] = len(test[i]) + mapframe_test[i-1]
    
# print(mapframe_test)

{-1: 0, 0: 490, 1: 909, 2: 1441, 3: 1943, 4: 2423, 5: 2895, 6: 3387, 7: 3857, 8: 4327, 9: 4814, 10: 5259, 11: 5729, 12: 6202, 13: 6699, 14: 7123, 15: 7560, 16: 8028, 17: 8458, 18: 9003, 19: 9671, 20: 10304, 21: 10690, 22: 11017, 23: 11794, 24: 12848, 25: 13517, 26: 13839, 27: 14079, 28: 14693, 29: 14988, 30: 16069, 31: 16760, 32: 17137, 33: 17300, 34: 17429, 35: 17975, 36: 18772, 37: 19471, 38: 20178, 39: 20777, 40: 21779, 41: 22216, 42: 22988, 43: 23303, 44: 23732, 45: 24442, 46: 25152, 47: 25738, 48: 26118, 49: 26327, 50: 26666, 51: 27065, 52: 27488, 53: 28144, 54: 28732, 55: 29335, 56: 29621, 57: 30571, 58: 30838, 59: 31585, 60: 32575, 61: 33401, 62: 34386, 63: 34570, 64: 35252, 65: 35524, 66: 36607, 67: 37030, 68: 37540, 69: 38132, 70: 38839, 71: 39123, 72: 39931, 73: 41079, 74: 41726, 75: 42471, 76: 43229, 77: 43938, 78: 44718, 79: 45200, 80: 45980, 81: 46793, 82: 47260, 83: 47670, 84: 48253, 85: 49016, 86: 49673, 87: 50684, 88: 51763, 89: 52202, 90: 52472, 91: 53367, 92: 53936, 9

In [41]:
dict_test = {}
len_test = len(test)
k = 12

for i in range(len_test):
    
    #corresponding to every index, store how many elements to pad and before(0) or after(1)
    for j in range(mapframe_test[i-1], mapframe_test[i-1]+ k):
        dict_test[j] = [k - (j - mapframe_test[i-1]), 0]  
        
    for j in range(mapframe_test[i]-k, mapframe_test[i]):
        dict_test[j] = [1 + k- (mapframe_test[i] - j), 1]

In [47]:
class joinframe_test(Dataset):
    
    def __init__(self, x, k):
        super().__init__()
        self._x = x    
        self.k = k
            
    def __len__(self):
        return len(self._x)
      
    def __getitem__(self, index):
    
        
        if index in dict_test :
            len_pad = dict_test[index][0]
            
            if dict_test[index][1] == 0:
                # before padding            
                x_item = self._x[(index - (self.k - len_pad)) : (index + self.k+1)]
                padding = np.zeros((dict_test[index][0], 40))
                x_item = np.vstack((padding, x_item))
                x_item = x_item.flatten()             
          
            elif dict_test[index][1] == 1:
                # after padding
                x_item = self._x[(index - self.k) : (index + 1 + (self.k-len_pad))]
                padding = np.zeros((dict_test[index][0], 40))
                x_item = np.vstack((x_item, padding))
                x_item = x_item.flatten()
                
        else:
            x_item = self._x[index - self.k : index + self.k + 1]
            x_item = x_item.flatten()

        return x_item 

In [51]:
model.eval()
model.to(device)
total = 0
correct = 0
y = []
start = time.time()

frameobj = joinframe_test(concat_test, k = 12) 
framedata = DataLoader(frameobj, shuffle= False, pin_memory=True, num_workers=0)


for xbatch in framedata:
    runtime+=1
    
    xbatch = xbatch.to(device)
    output = model(xbatch)
    _, y_cap = torch.max(output, 1)
    y.extend(y_cap.cpu().numpy())


In [52]:
from IPython.display import HTML
import pandas as pd
import numpy as np
df = pd.DataFrame(np.array(y), columns=['label'])

df.to_csv('submission.csv')

def create_download_link(title = "Download CSV file", filename = "data.csv"):  
    html = '<a href={filename}>{title}</a>'
    html = html.format(title=title,filename=filename)
    return HTML(html)

# create a link to download the dataframe which was saved with .to_csv method
create_download_link(filename='submission.csv')