# Installing Modules

In [1]:
!pip install wandb -qU
!pip install pytorch_lightning


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Drive Mount

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Importing Modules


In [1]:
import wandb
import torch 
import pytorch_lightning as pl
import torch.nn as nn
from torch.nn  import functional
from pytorch_lightning.loggers import WandbLogger
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as data
import numpy as np
import random
import csv
import pandas as pd
from torch.utils.data import Dataset, DataLoader


# Unzip data

In [4]:
!unzip /content/drive/MyDrive/dl/aksharantar_sampled.zip

Archive:  /content/drive/MyDrive/dl/aksharantar_sampled.zip
replace aksharantar_sampled/asm/asm_test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


# Connecting Wandb


In [5]:

# wandb.login(key="8d6c17aa48af2229c26cbc16513ef266358c0b96")
# wandb.init(project="Assignment-02")

# Data Loading

In [33]:
base_dir = "aksharantar_sampled/mal/"

train_file = base_dir+"mal_train.csv"
val_file = base_dir+"mal_train.csv"
test_file = base_dir+"mal_train.csv"

train_data = pd.read_csv(train_file,header=None)
val_data = pd.read_csv(val_file,header=None)
test_data = pd.read_csv(test_file,header=None)



latin_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':2,'<end>':3}
lang_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':2,'<end>':3}
for word in train_data[0]:
  for char in word :
    if char not in latin_chars:
      latin_chars[char] = len(latin_chars)

for word in train_data[1]:
  for char in word :
    if char not in lang_chars:
      lang_chars[char] = len(lang_chars)


latin_max_length = len(max(train_data[0],key = len))
lang_max_length = len(max(train_data[1],key = len))

In [34]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def word_to_vec(data):
  data1= data.T
  data_pairs = []
  for i in range(0,len(data)):
    word =  [2]+[latin_chars.get(char,latin_chars['<UNK>']) for char in data1[i][0]] + [0]*(latin_max_length - len(data1[i][0]))+[3]
    latin_tensor = torch.tensor(word).to(device)
    word =  [2]+[lang_chars.get(char,lang_chars['<UNK>']) for char in data1[i][1]] + [0]*(lang_max_length - len(data1[i][1]))+[3]
    lang_tensor = torch.tensor(word).to(device)
    data_pairs.append([latin_tensor,lang_tensor])
  return data_pairs

# DataLoader

In [35]:
train_data_pairs = word_to_vec(train_data)
val_data_pairs = word_to_vec(val_data)
test_data_pairs = word_to_vec(test_data)

train_dataloader = DataLoader(train_data_pairs, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data_pairs, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_data_pairs, batch_size=32, shuffle=False)


# Encoder

In [36]:
class Encoder(nn.Module):
  def __init__(self,input_size,embedding_size,hidden_size,layers,cell_type,bidirectional):
    super(Encoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(input_size,embedding_size)
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional)

  def forward(self,x):
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding)

    return hidden


# Decoder

In [37]:
class Decoder(nn.Module):
  def __init__(self,output_size,embedding_size,hidden_size,layers,cell_type,bidirectional):
    super(Decoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(output_size,embedding_size) 
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional)
    if bidirectional:
      self.out = nn.Linear(hidden_size*2,output_size) 
    else :
      self.out = nn.Linear(hidden_size,output_size) 

    
  def forward(self,x,hidden):
    x = x.unsqueeze(1).transpose(0,1)
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding,hidden)
    output = self.out(output.squeeze(0))

    return output,hidden




# Model

In [41]:
class seq2seq(pl.LightningModule):
  def __init__(self,input_size,output_size,embedding_size,hidden_size,encoder_layer_size,decoder_layer_size,cell_type,bidirectional,learning_rate=0.0001):
    super(seq2seq,self).__init__()
    self.output_size = output_size
    self.cell_type = cell_type
    self.train_step_acc = []
    self.train_step_loss = []
    self.val_step_acc = []
    self.val_step_loss = []
    self.decoder_layer_size = decoder_layer_size #*  2 if bidirectional else 1
    self.bidirectional = bidirectional
    self.encoder_layer_size = encoder_layer_size 
    self.encoder = Encoder(input_size,embedding_size,hidden_size,encoder_layer_size,cell_type,bidirectional)
    self.decoder = Decoder(output_size,embedding_size,hidden_size,decoder_layer_size,cell_type,bidirectional)
    self.learning_rate = learning_rate

  def forward(self,input,output,tf = 1) :
    output_len = output.shape[1]
    batch_size = input.shape[0]
    output_size = self.output_size
    hidden = self.encoder(input.transpose(0,1))
    if self.encoder_layer_size >= self.decoder_layer_size :
      hidden = hidden[-1*self.decoder_layer_size*  2 if self.bidirectional else 1:]
    else :

      for i in range(self.decoder_layer_size - self.encoder_layer_size):
        temp_hidden = hidden[-1 *2 if self.bidirectional else 1:]
        if(temp_hidden.shape[0] == 0) :
          temp_hidden = hidden

        hidden = torch.cat((hidden,temp_hidden) )
    # print("hidden encoder",hidden.shape)
    # hidden = torch.sum(hidden,dim=0).reshape(1,64,256)
    # print("hidden encoder",hidden.shape)

    output_seq = torch.zeros(output.shape[0],batch_size,output_size).to(device)


    output = output.transpose(0,1)
    next_input = output[:,0]
    for t in range(output.shape[1]):
      output_rnn,hidden = self.decoder( next_input ,hidden)
      # print("hidden decoder",hidden.shape)
      output_seq[t] = output_rnn.squeeze(1)
      next_input =  output_seq[t].argmax(1) if random.random() < tf else output[:,t]
    # else :
    #   for t in range(output.shape[1]):
    #     output_rnn,hidden = self.decoder(output[:,t],hidden)

    #     output_seq[t] = output_rnn.squeeze(1)
    return output_seq

  def training_step(self,batch):
    input,output = batch

    output = output.permute(1,0)
    output_seq = self(input,output)
    output = output.permute(1,0)

    output_seq_2 = torch.zeros(output_seq.shape).to(device)
    batch_n = np.arange(len(output_seq))

    for f in range(len(output)):
      col = output[f]
      output_seq_2[batch_n,f,np.array(col.cpu())] = 1
    output_dim = output_seq.shape[-1]
    output_seq_t1 = output_seq[1:].view(-1,output_dim)
    output_seq_t2 = output_seq_2[1:].view(-1,output_dim)
    loss = nn.CrossEntropyLoss()
    loss = loss(output_seq_t1,output_seq_t2).mean()
    
    output = output.permute(1,0)
    output_ = torch.argmax(output_seq,2)
    acc_1 = torch.all(output_[1:-1,:] == output[1:-1,:],dim=0)
    acc = torch.sum(acc_1 == True)/len(acc_1)

    self.log('train_loss', loss,on_epoch = True,on_step = False,prog_bar=True)
    self.train_step_loss.append(loss)
    self.log('train_acc', acc,on_epoch = True,on_step = False,prog_bar=True)
    self.train_step_acc.append(acc)

    return loss



  def on_train_epoch_end(self):
    
    train_acc =  torch.stack(self.train_step_acc).mean()
    train_loss =  torch.stack(self.train_step_loss).mean()
    val_acc =  torch.stack(self.val_step_acc).mean()
    val_loss =  torch.stack(self.val_step_loss).mean()
    print("train_loss:",train_loss.item(),"train_acc",train_acc.item(),"val_loss:",val_loss.item(),"val_acc",val_acc.item())
    # wandb.log({"train_loss":train_loss.item(),"train_acc":train_acc.item(),"val_loss":val_loss.item(),"val_acc":val_acc.item()})
    self.train_step_acc.clear() 
    self.train_step_loss.clear() 
    self.val_step_acc.clear() 
    self.val_step_loss.clear() 


  def validation_step(self, batch,batch_idx):
    input,output = batch

    output = output.permute(1,0)
    output_seq = self(input,output,0)
    output = output.permute(1,0)

    output_seq_2 = torch.zeros(output_seq.shape).to(device)
    batch_n = np.arange(len(output_seq))

    for f in range(len(output)):
      col = output[f]
      output_seq_2[batch_n,f,np.array(col.cpu())] = 1

    output_dim = output_seq.shape[-1]
  
    output_seq_t1 = output_seq[1:].view(-1,output_dim)
    output_seq_t2 = output_seq_2[1:].view(-1,output_dim)

    loss = nn.CrossEntropyLoss()
    loss = loss(output_seq_t1,output_seq_t2).mean()
    output = output.permute(1,0)
    output_ = torch.argmax(output_seq,2)
    acc_1 = torch.all(output_[1:-1,:] == output[1:-1,:],dim=0)
    acc = torch.sum(acc_1 == True)/len(acc_1)

    self.log('val_loss', loss,on_epoch = True,on_step = False,prog_bar=True)
    self.val_step_loss.append(loss)
    self.log('val_acc', acc,on_epoch = True,on_step = False,prog_bar=True)
    self.val_step_acc.append(acc)

    return loss

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(),lr= self.learning_rate)



# Model Training

In [42]:
model = seq2seq(input_size = len(latin_chars), output_size = len(lang_chars),
                embedding_size = 16, hidden_size = 256,encoder_layer_size = 3,
                decoder_layer_size = 2,cell_type = nn.GRU,bidirectional =True ,
                learning_rate = 0.001)
model.to(device)


seq2seq(
  (encoder): Encoder(
    (embedding): Embedding(30, 16)
    (rnn): GRU(16, 256, num_layers=3, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(73, 16)
    (rnn): GRU(16, 256, num_layers=2, bidirectional=True)
    (out): Linear(in_features=512, out_features=73, bias=True)
  )
)

In [None]:



trainer = pl.Trainer(max_epochs = 30)
trainer.fit(model, train_dataloader,val_dataloader)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 2.8 M 
1 | decoder | Decoder | 1.6 M 
------------------------------------
4.4 M     Trainable params
0         Non-trainable params
4.4 M     Total params
17.716    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

train_loss: 1.0733054876327515 train_acc 0.02548827975988388 val_loss: 0.5199981927871704 val_acc 0.09332084655761719


Validation: 0it [00:00, ?it/s]

train_loss: 0.4818159341812134 train_acc 0.19626952707767487 val_loss: 0.2881896495819092 val_acc 0.29488280415534973


Validation: 0it [00:00, ?it/s]

train_loss: 0.3583606779575348 train_acc 0.3189648389816284 val_loss: 0.23412196338176727 val_acc 0.34773436188697815


Validation: 0it [00:00, ?it/s]

train_loss: 0.2899172008037567 train_acc 0.39894530177116394 val_loss: 0.22684291005134583 val_acc 0.34666013717651367


Validation: 0it [00:00, ?it/s]

train_loss: 0.24611781537532806 train_acc 0.45048826932907104 val_loss: 0.17586714029312134 val_acc 0.4407617151737213


Validation: 0it [00:00, ?it/s]

train_loss: 0.21700161695480347 train_acc 0.48775389790534973 val_loss: 0.14714618027210236 val_acc 0.4856835901737213


Validation: 0it [00:00, ?it/s]

train_loss: 0.19074603915214539 train_acc 0.5221288800239563 val_loss: 0.14484718441963196 val_acc 0.47929686307907104


Validation: 0it [00:00, ?it/s]

train_loss: 0.17132574319839478 train_acc 0.549121081829071 val_loss: 0.17566902935504913 val_acc 0.3988671898841858


Validation: 0it [00:00, ?it/s]

train_loss: 0.15811537206172943 train_acc 0.5727733969688416 val_loss: 0.12517981231212616 val_acc 0.5241601467132568


Validation: 0it [00:00, ?it/s]

train_loss: 0.1457425206899643 train_acc 0.587890625 val_loss: 0.14781510829925537 val_acc 0.4722265601158142


Validation: 0it [00:00, ?it/s]

train_loss: 0.13507361710071564 train_acc 0.611132800579071 val_loss: 0.13413302600383759 val_acc 0.5027148127555847


Validation: 0it [00:00, ?it/s]

train_loss: 0.12873896956443787 train_acc 0.6190234422683716 val_loss: 0.2159806340932846 val_acc 0.381660133600235


Validation: 0it [00:00, ?it/s]

train_loss: 0.11733928322792053 train_acc 0.6393749713897705 val_loss: 0.12259569019079208 val_acc 0.5173632502555847


Validation: 0it [00:00, ?it/s]

train_loss: 0.11701159179210663 train_acc 0.6394921541213989 val_loss: 0.13649357855319977 val_acc 0.49482420086860657


Validation: 0it [00:00, ?it/s]

train_loss: 0.1140698790550232 train_acc 0.6469531059265137 val_loss: 0.17320503294467926 val_acc 0.4299413859844208


Validation: 0it [00:00, ?it/s]

train_loss: 0.11070937663316727 train_acc 0.6551562547683716 val_loss: 0.2197176069021225 val_acc 0.3641015589237213


Validation: 0it [00:00, ?it/s]

train_loss: 0.11072558909654617 train_acc 0.6560937166213989 val_loss: 0.27965247631073 val_acc 0.3106640577316284


Validation: 0it [00:00, ?it/s]

train_loss: 0.1034790575504303 train_acc 0.6708984375 val_loss: 0.31348201632499695 val_acc 0.23636718094348907


Validation: 0it [00:00, ?it/s]


# Sweep Config

In [None]:


config= {
    'method': 'bayes',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize', 
        'name': 'val_acc'
      },
    "parameters":
    {
      
    "bidirectional" :{
        "values" : [True,False]
    },
    "dropout" :{
        "values" : [0.2,0.3,0.4]
    },
    "cell_type" :{
          "values" : ["RNN", "GRU", "LSTM"]
    },
      "epochs" :{
          "values" : [5,  10]
    },
     
      "encoder_layers" :{
          "values" : [1,  2]
    },
      "decoder_layers" :{
          "values" : [1,  2]
    },
      "embedding_size" :{
          "values" : [16,32,64,256]
    },
      "hidden_layer_size" :{
          "values" : [16,32,64,256]
    },

      "learning_rate" :{
          "values" : [1e-3,1e-4]
    }

    }

}


# Sweep Function

In [None]:

# cell_map = {"RNN":nn.RNN, "GRU":nn.GRU, "LSTM":nn.LSTM()}
# def sweeprun():

#   wandb.init()
#   bidirectional = wandb.config.bidirectional
#   dropout = wandb.config.dropout
#   cell_type = wandb.config.cell_type

#   encoder_layers = wandb.config.encoder_layers
#   decoder_layers = wandb.config.decoder_layers
#   epochs = wandb.config.epochs
#   learning_rate = wandb.config.learning_rate

#   embedding_size = wandb.config.embedding_size
#   hidden_layer_size = wandb.config.hidden_layer_size

#   run_name = "lr_{}_rnn_{}_dp_{}_bd_{}_el_{}_dl_{}_ep_{}_es_{}_hs_{}".format(learning_rate,cell_type, dropout, bidirectional,encoder_layers, decoder_layers,epochs,embedding_size,hidden_layer_size)

#   cell_type = cell_map[cell_type]
#   model = seq2seq(len(latin_chars), len(lang_chars),embedding_size, hidden_layer_size,cell_type,bidirectional,learning_rate)
#   trainer = pl.Trainer(max_epochs=epochs) 
#   trainer.fit(model, train_dataloader,val_dataloader)

#   wandb.run.name = run_name
#   wandb.finish()



# sweep_id = wandb.sweep(config,project="Assignment-03", entity = "saisreeram")
# wandb.agent(sweep_id, sweeprun)

In [None]:
a = [1]

In [None]:
a[0]


In [None]:
train_data[0]