In [None]:
from google.colab import drive

drive.mount("/content/drive", force_remount=True)

In [None]:
!pip install python-Levenshtein
!git clone --recursive https://github.com/parlance/ctcdecode.git
!pip install wget
%cd ctcdecode
!pip install .
%cd ..

!pip install torchsummaryX # We also install a summary package to check our model's forward before training

# DataLoader

In [None]:
def get_file_paths(data_dirs, reskin_file = "reskin_data.csv" ):
    """
    Input: is a ratio of training to validation split. They should sum up to 100 and paths to directories.
    Output: Paths to maintain this split across each class (The main idea being to approximately
    maintain the same ratio. 0 will still be dominant but rest of the classes should be equal in datapoints roughly )
    """
    if(type(data_dirs) == str):
        data_dirs = [data_dirs]
    reskin_paths = {"-1cloth":[],"0cloth":[], "1cloth":[], "2cloth": [], "3cloth": []}
    for data_dir in data_dirs:
        class_dirs = os.listdir(data_dir)
        for class_dir in class_dirs:
            temp = []
            path_dirs = os.listdir(data_dir + "/" + class_dir)
            for path_dir in path_dirs:
                reskin_file_path = data_dir + "/" + class_dir + "/" + path_dir + "/" + reskin_file
                if("0cloth" in class_dir):
                    reskin_paths["0cloth"].append([reskin_file_path, 0])
                elif("1cloth" in class_dir):
                    reskin_paths["1cloth"].append([reskin_file_path, 1])
                elif("2cloth" in class_dir):
                    reskin_paths["2cloth"].append([reskin_file_path, 2])
                elif("3cloth" in class_dir):
                    reskin_paths["3cloth"].append([reskin_file_path, 3])
    return reskin_paths


def setup_paths(data_dirs, train_val_test_split=[0.7, 0.2, 0.1]):
    paths = get_file_paths(data_dirs)
    train_paths = []
    val_paths = []
    test_paths = []
    for key in paths.keys():
      # if(self.shuffle):
      #     random.shuffle(paths[key])
      # else:
      #     pass
      train_num = int(train_val_test_split[0]*len(paths[key]))
      val_num = int(train_val_test_split[1]*len(paths[key]))
      train_paths+=paths[key][:train_num]
      val_paths+=paths[key][train_num:train_num+val_num]
      test_paths+=paths[key][train_num+val_num:]
    
    return train_paths, val_paths, test_paths

## TestData loader

In [None]:

dirn = "/content/drive/MyDrive/idl_project/Basic_Dataset_RealTrials_NoRub"
train_paths, val_paths, test_paths = setup_paths(dirn)

print(train_paths)
print(val_paths)
print(test_paths)


#LSTM model

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchsummaryX import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

from sklearn.metrics import accuracy_score
import gc
import zipfile
import pandas as pd
from tqdm import tqdm
import os
import datetime
import time
# imports for decoding and distance calculation
import ctcdecode
import Levenshtein
from ctcdecode import CTCBeamDecoder

import warnings
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

In [None]:
class LibriSamples(torch.utils.data.Dataset):

    def __init__ (self, data_path):  # You can use partition to specify train or dev

        self.X = []
        self.Y = []

        for path, label in data_path:  # path [file_location, label]
            data = np.loadtxt(path, delimiter=",")
            data = np.float32((data[:,:15]))
            self.X.append(data)
            labels = np.empty(5)
            labels.fill(label)
            self.Y.append(labels)

        assert (len(self.X) == len(self.Y))

    def __len__ (self):
        return len(self.X)

    def __getitem__ (self, idx):
        X = self.X[idx]
        Y = self.Y[idx]

        X = torch.tensor(X)
        Y = torch.tensor(Y)

        return X, Y

    def collate_fn (self, batch):
        # print("call collate_fn in Library samples batch: ", type(batch), np.array(batch).shape, np.array(batch[0][0]).shape, np.array(batch[0][1]).shape)
        batch_x = [x for x, y in batch]
        batch_y = [y for x, y in batch]

        # print("batch_x batch_y", np.array(batch_x[0]).shape, np.array(batch_y[0]).shape)
        batch_x_pad = pad_sequence(batch_x,
                                   batch_first=True)  # TODO: pad the sequence with pad_sequence (already imported)
        lengths_x = [x.shape[0] for x in batch_x]  # TODO: Get original lengths of the sequence before padding
        batch_y_pad = pad_sequence(batch_y,
                                   batch_first=True)  # TODO: pad the sequence with pad_sequence (already imported)
        lengths_y = [y.shape[0] for y in batch_y]  # TODO: Get original lengths of the sequence before padding

        # print(lengths_x, len(lengths_x))
        # print(lengths_y)

        return batch_x_pad, batch_y_pad, torch.tensor(lengths_x), torch.tensor(lengths_y)


In [None]:
batch_size = 10

root = "/content/drive/MyDrive/idl_project/Basic_Dataset_RealTrials_NoRub"
train_paths, val_paths, test_paths = setup_paths(root)

train_data = LibriSamples(train_paths)
val_data = LibriSamples(val_paths)
test_data = LibriSamples(test_paths)

train_loader = DataLoader(train_data, batch_size=batch_size, collate_fn=train_data.collate_fn, shuffle=False, drop_last=False, num_workers=2)# TODO: Define the train loader. Remember to pass in a parameter (function) for the collate_fn argument 
val_loader = DataLoader(val_data, batch_size=batch_size, collate_fn=val_data.collate_fn, shuffle=False, drop_last=False, num_workers=1)# TODO: Define the val loader. Remember to pass in a parameter (function) for the collate_fn argument 
test_loader = DataLoader(test_data, batch_size=batch_size, collate_fn=test_data.collate_fn, shuffle=False, drop_last=False, num_workers=2)# TODO: Define the test loader. Remember to pass in a parameter (function) for the collate_fn argument 

print("Batch size: ", batch_size)
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

In [None]:
# Optional
# Test code for checking shapes and return arguments of the train and val loaders
for data in test_loader: # data shape(Time, Batch, feature)
    x, y, lx, ly = data # if you face an error saying "Cannot unpack", then you are not passing the collate_fn argument
    print(x.shape, y.shape, lx.shape, ly.shape)
    # print(x)
    # print(y[0].shape)
    break

In [None]:
class Network(nn.Module):

    def __init__(self): # You can add any extra arguments as you wish

        super(Network, self).__init__()

        # Embedding layer converts the raw input into features which may (or may not) help the LSTM to learn better 
        # For the very low cut-off you dont require an embedding layer. You can pass the input directly to the  LSTM
        # self.embedding = 
        
        self.lstm = nn.LSTM(15, 256, 1, batch_first=True)# TODO: # Create a single layer, uni-directional LSTM with hidden_size = 256
        # Use nn.LSTM() Make sure that you give in the proper arguments as given in https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html

        self.classification = nn.Linear(256, 3)# TODO: Create a single classification layer using nn.Linear()

    def forward(self, x, x_origin_len): #x shape (T, B, 13)# TODO: You need to pass atleast 1 more parameter apart from self and x
        # print("------Start of LSTM forward------")
        # print("call forward")
        # print(x.size(), x_origin_len.size())
        # print(x)
        # print(x_origin_len)
        # x is returned from the dataloader. So it is assumed to be padded with the help of the collate_fn
        # Because Batch_first is true, so the input for pack should be (B, T, 13)
        # since in pad_sequence we chose batch_first, so the input already be (B, T, 13)
        x_lstm_in = x
        # print("LSTM in (B,T,13)",x_lstm_in.size())
        packed_input = pack_padded_sequence(x_lstm_in, x_origin_len, enforce_sorted=False, batch_first=True)# TODO: Pack the input with pack_padded_sequence. Look at the parameters it requires

        # out1 (B, T, 256) because batch_first is true
        out1, (out2, out3) = self.lstm(packed_input)# TODO: Pass packed input to self.lstm
        # As you may see from the LSTM docs, LSTM returns 3 vectors. Which one do you need to pass to the next function?
        out, lengths  = pad_packed_sequence(out1, batch_first=True)# TODO: Need to 'unpack' the LSTM output using pad_packed_sequence
        # print("unpacked LSTM output (B,T,256) ", out.size())
        out = self.classification(out)# TODO: Pass unpacked LSTM output to the classification layer
        # print("Linear cls out shape (B,T,41)",out.size())
        # out = # Optional: Do log softmax on the output. Which dimension?
        log_soft_max = nn.LogSoftmax(2)
        out_prob = log_soft_max(out)
        # print("out_prob shape (B,T,41)",out_prob.size())
        # print("------End of LSTM forward------")
        # print(out[0][0], out_prob[0][0])
        return out_prob, lengths # TODO: Need to return 2 variables

model = Network().to(device)
print(model)
summary(model, x.to(device), lx) # x and lx are from the previous cell

# Training Configuration

In [None]:
model.cuda()

epochs = 20

criterion = nn.CTCLoss()# TODO: What loss do you need for sequence to sequence models? 
# Do you need to transpose or permute the model output to find out the loss? Read its documentation
optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)# TODO: Adam works well with LSTM (use lr = 2e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=1, verbose=True)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))
# decoder = CTCBeamDecoder(
#     labels=PHONEME_MAP,
#     model_path=None,
#     alpha=0,
#     beta=0,
#     cutoff_top_n=40,
#     cutoff_prob=1.0,
#     beam_width=10,
#     num_processes=4,
#     blank_id=0,
#     log_probs_input=True
# )
scaler = torch.cuda.amp.GradScaler()

#Trian and evaluate

In [None]:
def model_evaluate(model, device, dev_samples):
    
  model.eval()
  batch_bar = tqdm(total=len(dev_samples), dynamic_ncols=True, position=0, leave=False, desc='Val')
  avg_dist = 0
  total_loss = 0.0
  for i, (x, y, lx, ly) in enumerate(dev_samples):
      x = x.cuda()
      y = y.cuda()

      with torch.no_grad():
          output, length = model(x, lx)
          lost_input = output.permute(1, 0, 2) # (T,B,41)
          loss = criterion(lost_input, y, length, ly)
          total_loss += loss.item()

      # dist = calculate_levenshtein(output, y, length, ly, decoder, PHONEME_MAP)
      avg_dist += 0
      batch_bar.set_postfix(distance="{:.04f}".format(avg_dist))

  avg_dist = avg_dist / len(dev_samples) 
  avg_loss = total_loss / len(dev_samples)    
  batch_bar.close()
  # print("Batch avg dist: {:.04f}".format(avg_dist))
  
  return avg_loss, avg_dist


In [None]:
torch.cuda.empty_cache()

# TODO: Write the model training code 

# You are free to write your own code for training or you can use the code from previous homeworks' starter notebooks
# However, you will have to make modifications because of the following.
# (1) The dataloader returns 4 items unlike 2 for hw2p2
# (2) The model forward returns 2 outputs
# (3) The loss may require transpose or permuting

# Tip: Implement mixed precision training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
distance = 0
for epoch in range(epochs):
    # Quality of life tip: leave=False and position=0 are needed to make tqdm usable in jupyter
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 

    total_loss = 0

    model.train()
    for i, (x, y, lx, ly) in enumerate(train_loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        # Don't be surprised - we just wrap these two lines to make it work for FP16
        with torch.cuda.amp.autocast():     
            output, length = model(x, lx)
            # print("Model out shape(B, T, 41), output shape (B, T)", output.size(), length.size())
            lost_input = output.permute(1, 0, 2) # (T,B,41)
            loss = criterion(lost_input, y, length, ly)

        # Update # correct & loss as we go
        total_loss += float(loss)

        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            epoch="{}".format(epoch),
            distance="{:.04f}%".format(distance),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        # Another couple things you need for FP16. 
        scaler.scale(loss).backward() # This is a replacement for loss.backward()
        scaler.step(optimizer) # This is a replacement for optimizer.step()
        scaler.update() # This is something added just for FP16

        batch_bar.update() # Update tqdm bar

    batch_bar.close() # You need this to close the tqdm bar

    # You can add validation per-epoch here if you would like
    avg_loss, distance = model_evaluate(model, device, val_loader)
    scheduler.step(avg_loss) 
    # Save model
    # train_acc = 100 * num_correct / (len(train_loader) * batch_size)
    # stats = {
    #   "epoch":epoch,
    #   "train_stats": 0,
    #   "eval_stats": 0,
    #   "lr": optimizer.param_groups[0]["lr"]
    # }

    # # model_saver.save(StoredModel(model, optimizer, scheduler, criterion), stats, train_acc)

    print("Epoch {}/{}: Distance {}, Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        distance,
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))

model_path = "/content/drive/MyDrive/idl_project/trained_model"
model_name = "model_LSTM_" + time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
torch.save(model.state_dict(),model_path + model_name)