In [44]:
import numpy as np
from sklearn import preprocessing
import pandas as pd
import os
from tqdm import tqdm
import torch
from matplotlib import pyplot as plt
import time
import os
import torch.nn as nn
import torch.nn.utils.rnn as rnn
from torch.utils.data import Dataset, DataLoader, BatchSampler, RandomSampler

%matplotlib inline

In [56]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [33]:
labels = pd.read_csv("trainingData.csv")
le = preprocessing.LabelEncoder()
le.fit(labels["Language"])
labels["Language"] = le.transform(labels["Language"])
labels.head()

Unnamed: 0,Sample Filename,Language
0,000kouqjfnk.mp3,56
1,000w3fewuqj.mp3,151
2,000ylhu4sxl.mp3,155
3,0014x3zvjrl.mp3,35
4,001xjmtk2wx.mp3,69


In [36]:
train_dirs = []
train_base_dir = "Training data"
for direc in os.listdir("Training data"):
  train_dirs.append(direc)

In [41]:
train_labels = []
pbar = tqdm(total=66176)
for i, direc in enumerate(train_dirs):
    label = labels[labels["Sample Filename"] == direc]["Language"].item()
    train_labels.append(label)
    pbar.update(1)
pbar.close()    
labels = np.array(train_labels)    

100%|██████████| 66176/66176 [04:58<00:00, 222.05it/s]


In [42]:
np.save("labels.npy", labels)

In [25]:
data = np.load("traindata.npy")
np.random.shuffle(data)

In [105]:
data.shape
max(labels)

175

In [49]:
test_size = 20000
train_size = data.shape[0] - test_size
print(train_size)
train = data[:train_size, :, :]
train_label = labels[:train_size]
test = data[train_size:, :, :]
test_label = labels[train_size:]

46176


In [76]:
train.shape
train_label.shape
train_label[0]

104

In [51]:
test.shape
test_label.shape

(20000,)

In [98]:
class TrainDataset(Dataset):
  
    def __init__(self, train, label):
        self.train = train
        self.label = label

    def __len__(self):
        return self.train.shape[0]

    def __getitem__(self, idx):
        train = torch.Tensor(self.train[idx])
        train = train.transpose(0,1)
#         train = train.to(DEVICE)
        label = self.label[idx]
#         label = torch.Tensor(label)
#         label = label.to(DEVICE)
        return(train, label)

In [99]:
class TestDataset(Dataset):
  
  def __init__(self, test):
    self.test = test
  
  def __len__(self):
    return self.test.shape[0]
  
  def __getitem__(self, idx):
    test = torch.Tensor(self.test[idx])
    test = test.to(DEVICE)
#     test = test.unsqueeze(0)
    return test

In [100]:
train_dataset = TrainDataset(train, train_label)
test_dataset = TestDataset(test)
train_batch_size = 100
test_batch_size = 100
train_loader = DataLoader(train_dataset,
                        batch_sampler=BatchSampler(RandomSampler(train_dataset), train_batch_size, False))
test_loader = DataLoader(test_dataset, batch_size=test_batch_size)

In [101]:
# iter(train_loader).next()
# train_dataset[0][0].shape

[tensor([[[-535.1118,    0.0000,    0.0000,  ...,    0.0000,    0.0000,
              0.0000],
          [-535.1118,    0.0000,    0.0000,  ...,    0.0000,    0.0000,
              0.0000],
          [-378.0182,  134.4436,   24.9814,  ...,   -4.6965,  -11.2445,
            -10.4372],
          ...,
          [-477.9030,   27.7392,   39.3758,  ...,   -0.7092,   -2.7948,
              6.7452],
          [-386.8672,   -9.0076,   45.1327,  ...,   -1.6875,   -7.7957,
              7.7620],
          [-241.6925,   35.2376,   39.2086,  ...,   -2.5565,   -6.4098,
             -4.5064]],
 
         [[-557.1776,    0.0000,    0.0000,  ...,    0.0000,    0.0000,
              0.0000],
          [-557.1776,    0.0000,    0.0000,  ...,    0.0000,    0.0000,
              0.0000],
          [-487.6949,   40.7268,   14.3401,  ...,    4.1408,    7.9077,
             -3.0193],
          ...,
          [-137.7886,  153.1989,  -44.0398,  ...,   19.6007,   13.9441,
             16.2345],
          [-155.7

In [165]:
class DetectionModel(nn.Module):
    
    def __init__(self,vocab_size,embed_size,hidden_size, nlayers):
        super(DetectionModel,self).__init__()
        self.vocab_size=vocab_size
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.nlayers=nlayers
        self.rnn = nn.LSTM(input_size = embed_size,hidden_size=hidden_size,num_layers=nlayers, bidirectional=True) # Recurrent network
        self.scoring = nn.Linear(hidden_size * 2,vocab_size) # Projection layer
        
    def forward(self, seq_batch):
        batch_size = seq_batch.size(1)
        embed = seq_batch #L x N x E
        hidden = None
        output_lstm,hidden = self.rnn(embed,hidden) #L x N x H
        output_lstm_flatten = output_lstm.view(-1,self.hidden_size) #(L*N) x H
        output_flatten = self.scoring(output_lstm_flatten) #(L*N) x V
        return output_flatten.view(-1,batch_size,self.vocab_size)

In [166]:
def train_epoch(model, optimizer, train_loader):
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(DEVICE)
    batch_id=0
    for inputs,targets in train_loader:
        batch_loss = []
        batch_id+=1
        inputs = inputs.to(DEVICE)
        targets = targets.to(DEVICE)
        outputs = model(inputs) # 3D
        outputs = outputs[:, -1, :] # pull out the last layer
#         print(outputs.shape)
        loss = criterion(outputs,targets) # Loss of the flattened outputs
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_loss.append(loss.item())
        if batch_id % 20 == 0:
            lpw = np.mean(batch_loss)
            print("At batch",batch_id)
            print("Training loss :",lpw)

    return model
    

In [167]:
langcount = 176
model = DetectionModel(langcount,40,256,10)
model = model.to(DEVICE)
# optimizer = torch.optim.Adam(model.parameters(),lr=0.01, weight_decay=1e-6)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)

  "PyTorch was compiled without cuDNN support. To use cuDNN, rebuild "


In [168]:
for i in range(50):
    train_epoch(model, optimizer, train_loader)

  "PyTorch was compiled without cuDNN support. To use cuDNN, rebuild "


RuntimeError: size mismatch, m1: [86000 x 256], m2: [51200 x 176] at /jet/tmp/build/aten/src/THC/generic/THCTensorMathBlas.cu:249