# Pre-processing


In [None]:
import torch
import numpy as np
import torch.nn as nn
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
CUDA_VISIBLE_DEVICES=0

In [None]:
dev = np.load('dev.npy', allow_pickle=True)
dev_labels = np.load('dev_labels.npy',allow_pickle=True)

In [None]:
train = np.load('train.npy', allow_pickle = True)
train_labels = np.load('train_labels.npy',allow_pickle = True)

In [None]:
class Dataset_train(torch.utils.data.Dataset):
  def __init__(self, X, Y):
    self.X = np.array(X)
    self.Y = np.asarray(Y)
    self.length = len(self.X)
    
  def __len__(self):
      
    return self.length
    
  def __getitem__(self, index):

    xx = torch.from_numpy(self.X[index])
    yy = torch.tensor(self.Y[index])
     
    return xx ,yy  


In [None]:
def pad_sequences(batch):
  
  sorted_batch = batch #sorted(batch, key=lambda x: x[0].shape[0], reverse=True)
  sequences = [x[0] for x in sorted_batch]
  labels = [x[1] for x in sorted_batch]
  sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
  labels_padded = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True)

  seq_lengths = torch.LongTensor([len(x) for x in sequences])
  label_lengths = torch.LongTensor([len(x) for x in labels])

  return sequences_padded, labels_padded,seq_lengths, label_lengths



In [None]:
dataset = Dataset_train(train, train_labels)
train_loader_args = dict(shuffle = True, batch_size = 64, num_workers = 0, collate_fn = pad_sequences, pin_memory = True) 
train = torch.utils.data.DataLoader(dataset,**train_loader_args)

In [None]:
dataset = Dataset_train(dev, dev_labels)
dev_loader_args = dict(shuffle = False, batch_size = 64, num_workers = 0, collate_fn = pad_sequences, pin_memory = True) 
validation = torch.utils.data.DataLoader(dataset,**dev_loader_args)

# Model

In [None]:
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
class lstmModel(nn.Module):
    def __init__(self, hidden_size,kernel=2,nlayers=4, out_size=42, in_size=40):
        super(lstmModel, self).__init__()
        self.nlayers = nlayers
        self.hidden_size = hidden_size
        self.in_size = in_size
        self.out_size = out_size
        
        self.kernel = kernel

        self.cnns = torch.nn.Sequential(
            nn.Conv1d(self.in_size, self.hidden_size, kernel_size = self.kernel, stride = 1,padding=0, bias=False),
            nn.BatchNorm1d(self.hidden_size),
            nn.ReLU(),
            nn.Conv1d(self.hidden_size, self.hidden_size, kernel_size=1 , padding=0, bias=False),
            nn.BatchNorm1d(self.hidden_size),
            nn.ReLU(),
           )

        self.lstm = nn.LSTM(input_size=self.hidden_size,hidden_size=self.hidden_size, num_layers=self.nlayers,bias=True, batch_first=True,
                            dropout=0.4, bidirectional=True)

        self.hidden2label = torch.nn.Sequential(
            nn.Linear(self.hidden_size*2, self.hidden_size*4),
            nn.Linear(self.hidden_size*4, self.hidden_size*2),
            nn.Linear(self.hidden_size*2, self.out_size))
        
    def forward(self, x, length):   # x dim (batch, len, insize)
        batch, lens, insize = x.shape

        x = x.reshape(batch, insize,lens ) # batch, insize, len

        x = self.cnns(x)       

        x = x.permute(2, 0, 1)      # insize, len, batch 
  
        length = (length- self.kernel)//1 +1
        x_packed = nn.utils.rnn.pack_padded_sequence(x, length, enforce_sorted=False)


        out_packed = self.lstm(x_packed)[0]
        out = nn.utils.rnn.pad_packed_sequence(out_packed, batch_first=True)[0] 
        
        out = self.hidden2label(out).log_softmax(2) 

        out = out.permute(1, 0, 2) 
        
        return out, length

# training


In [None]:
!pip install Levenshtein
import Levenshtein as lev
import numpy as np
from phoneme_list import  N_PHONEMES, PHONEME_LIST, PHONEME_MAP
!git clone --recursive https://github.com/parlance/ctcdecode.git
!cd ctcdecode && pip install .
from ctcdecode import CTCBeamDecoder
import os

def levdistance(preds, target):
  distance = []
  for s in range(len(preds)):
    distance.append(lev.distance(preds[s],target[s]))
  return np.mean(distance)

In [None]:
def training(epoch, model, dataloader, val):
  for epoch in range(numEpochs):

    model.train()
    avg_loss = 0.0
    correct = 0
      
    for batch_num, (x, y, seq_len, label_len) in enumerate(dataloader):

      optimizer.zero_grad()
      
      x, y = x.to(device), y.to(device)

      outputs, outlen = model(x.float(), seq_len)

      loss = criterion(outputs, y,outlen, label_len )
      loss.backward()
      optimizer.step()
      
      avg_loss += loss.item()
      if batch_num % 99 ==1:
        print('train loss', avg_loss)

      del x
      del y
      del outlen
      del label_len
      torch.cuda.empty_cache()
      
    print('train_epoch',epoch,avg_loss) 

    model.eval()
    loss = 0
    with torch.no_grad():
      dist = []
      for batch_num, (x, y, seq_len, label_len) in enumerate(val):
        x, y = x.to(device), y.to(device)

        outputs, outlen = model(x.float(), seq_len)

        loss = criterion(outputs, y,outlen, label_len )
        loss += loss.item()
      
        decoder = CTCBeamDecoder(['$']* len(PHONEME_LIST) , beam_width=20, 
                                num_processes = os.cpu_count(), log_probs_input=True)
        
        probs = outputs.transpose(0,1)

        out, _, _, out_lens = decoder.decode(probs,outlen)
        preds = []
        target = []
        
        for i in range(len(x)):
          best_seq = out[i, 0, :out_lens[i,0]]
          preds.append(''.join([PHONEME_MAP[i] for i in out[i,0,:out_lens[i,0]]]))
          target.append(''.join([PHONEME_MAP[i] for i in y[i,:out_lens[i,0]]]))
       
          dist.append(levdistance(preds, target))
          #print(dist[-1])


        del x
        del y
        del label_len
        
        torch.cuda.empty_cache()
      print('test epoch',epoch,loss, np.mean(dist))
      scheduler.step(loss)


In [None]:
import tensorflow as tf
layer = [4]
for l in layer:
  hidden = 256
  learningRate = 2e-3
  numEpochs = 30
    
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  #model = torch.load('dist_12')
  model = lstmModel(hidden)

  model = model.cuda()
  weightDecay = 5e-5

  criterion = nn.CTCLoss()

  optimizer = torch.optim.Adam(model.parameters(), lr=learningRate,weight_decay=weightDecay)
  scheduler =torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', 0.5,3)

  training(numEpochs, model, train,validation)

# test


In [None]:
del train
del validation
!unzip test.npy.zip

Archive:  test.npy.zip
replace test.npy? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [None]:
test = np.load('test.npy', allow_pickle = True)

In [None]:
class Dataset_test(torch.utils.data.Dataset):
  def __init__(self, X):
    self.X = np.array(X)
   
    self.length = len(self.X)
    self.Y = np.zeros(self.length)
    
  def __len__(self):
      
    return self.length
    
  def __getitem__(self, index):
  
    xx = torch.from_numpy(self.X[index])
    yy = torch.from_numpy(self.X[index])
     
    return xx, yy


In [None]:
def pad_sequences(batch):
  sorted_batch = sorted(batch, key=lambda x: x[0].shape[0], reverse=True)
  sequences = [x[0] for x in sorted_batch]
  labels = [x[1] for x in sorted_batch]
  sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
  labels_padded = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True)

  seq_lengths = torch.LongTensor([len(x) for x in sequences])
  label_lengths = torch.LongTensor([len(x) for x in labels])

  return sequences_padded, labels_padded,seq_lengths, label_lengths


In [None]:
dataset = Dataset_test(test)
test_loader_args = dict(shuffle = False, batch_size = 64, num_workers = 0, collate_fn = pad_sequences) 
test = torch.utils.data.DataLoader(dataset,**test_loader_args)

In [None]:

preds = []

for batch_num, (x, y, seq_len,label_len) in enumerate(test):
  x = x.to(device)

  outputs, outlen = model(x.float(), seq_len)

  decoder = CTCBeamDecoder(['$']* len(PHONEME_LIST) , beam_width=20, 
                          num_processes = os.cpu_count(), log_probs_input=True)

  probs = outputs.transpose(0,1)

  out, _, _, out_lens = decoder.decode(probs,outlen)

  

  for i in range(len(x)):
    pmap = []
    best_seq = out[i, 0, :out_lens[i,0]]
    
    for k in best_seq:
      pmap.append(PHONEME_MAP[k])
    preds.append(''.join(pmap))
 
  print(preds)

In [None]:
import pandas as pd
df = pd.DataFrame({'id':np.arange(len(preds)), "label":preds})
df.to_csv(r"submission.csv", index=False)

In [None]:
!kaggle competitions submit -c 11785-spring2021-hw3p2-slacklate -f submission.csv -m "Message"

100% 203k/203k [00:02<00:00, 79.4kB/s]
Successfully submitted to 11785 Homework 3 Part 2: Seq to Seq