# Installing Modules

In [1]:
!pip install wandb -qU
!pip install pytorch_lightning


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.7/201.7 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-2.0.2-py3-none-any.whl (719 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m719.0/719.0 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.11.4-py3-non

# Drive Mount

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Importing Modules


In [3]:
import wandb
import torch 
import pytorch_lightning as pl
import torch.nn as nn
from torch.nn  import functional
from pytorch_lightning.loggers import WandbLogger
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as data
import numpy as np
import random
import csv
import pandas as pd
from torch.utils.data import Dataset, DataLoader


# Unzip data

In [4]:
!unzip /content/drive/MyDrive/dl/aksharantar_sampled.zip

Archive:  /content/drive/MyDrive/dl/aksharantar_sampled.zip
   creating: aksharantar_sampled/
   creating: aksharantar_sampled/asm/
  inflating: aksharantar_sampled/asm/asm_test.csv  
  inflating: aksharantar_sampled/asm/asm_train.csv  
  inflating: aksharantar_sampled/asm/asm_valid.csv  
   creating: aksharantar_sampled/ben/
  inflating: aksharantar_sampled/ben/ben_test.csv  
  inflating: aksharantar_sampled/ben/ben_train.csv  
  inflating: aksharantar_sampled/ben/ben_valid.csv  
   creating: aksharantar_sampled/brx/
  inflating: aksharantar_sampled/brx/brx_test.csv  
  inflating: aksharantar_sampled/brx/brx_train.csv  
  inflating: aksharantar_sampled/brx/brx_valid.csv  
   creating: aksharantar_sampled/guj/
  inflating: aksharantar_sampled/guj/guj_test.csv  
  inflating: aksharantar_sampled/guj/guj_train.csv  
  inflating: aksharantar_sampled/guj/guj_valid.csv  
   creating: aksharantar_sampled/hin/
  inflating: aksharantar_sampled/hin/hin_test.csv  
  inflating: aksharantar_sampled

# Connecting Wandb


In [5]:

# wandb.login(key="8d6c17aa48af2229c26cbc16513ef266358c0b96")
# wandb.init(project="Assignment-02")

# Data Loading

In [6]:
base_dir = "aksharantar_sampled/mal/"

train_file = base_dir+"mal_train.csv"
val_file = base_dir+"mal_train.csv"
test_file = base_dir+"mal_train.csv"

train_data = pd.read_csv(train_file,header=None)
val_data = pd.read_csv(val_file,header=None)
test_data = pd.read_csv(test_file,header=None)



latin_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':2,'<end>':3}
lang_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':2,'<end>':3}
for word in train_data[0]:
  for char in word :
    if char not in latin_chars:
      latin_chars[char] = len(latin_chars)

for word in train_data[1]:
  for char in word :
    if char not in lang_chars:
      lang_chars[char] = len(lang_chars)


latin_max_length = len(max(train_data[0],key = len))
lang_max_length = len(max(train_data[1],key = len))

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def word_to_vec(data):
  data1= data.T
  data_pairs = []
  for i in range(0,len(data)):
    word =  [2]+[latin_chars.get(char,latin_chars['<UNK>']) for char in data1[i][0]] + [0]*(latin_max_length - len(data1[i][0]))+[3]
    latin_tensor = torch.tensor(word).to(device)
    word =  [2]+[lang_chars.get(char,lang_chars['<UNK>']) for char in data1[i][1]] + [0]*(lang_max_length - len(data1[i][1]))+[3]
    lang_tensor = torch.tensor(word).to(device)
    data_pairs.append([latin_tensor,lang_tensor])
  return data_pairs

# DataLoader

In [8]:
train_data_pairs = word_to_vec(train_data)
val_data_pairs = word_to_vec(val_data)
test_data_pairs = word_to_vec(test_data)

train_dataloader = DataLoader(train_data_pairs, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_data_pairs, batch_size=128, shuffle=False)
test_dataloader = DataLoader(test_data_pairs, batch_size=32, shuffle=False)


# Encoder

In [9]:
class Encoder(nn.Module):
  def __init__(self,input_size,embedding_size,hidden_size,layers,cell_type,bidirectional,dropout):
    super(Encoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(input_size,embedding_size)
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional,dropout=dropout)

  def forward(self,x):
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding)

    return hidden


# Decoder

In [10]:
class Decoder(nn.Module):
  def __init__(self,output_size,embedding_size,hidden_size,layers,cell_type,bidirectional,dropout):
    super(Decoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(output_size,embedding_size) 
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional,dropout=dropout)
    if bidirectional:
      self.out = nn.Linear(hidden_size*2,output_size) 
    else :
      self.out = nn.Linear(hidden_size,output_size) 

    
  def forward(self,x,hidden):
    x = x.unsqueeze(1).transpose(0,1)
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding,hidden)
    output = self.out(output.squeeze(0))

    return output,hidden




# Model

In [17]:

class seq2seq(pl.LightningModule):
  def __init__(self,input_size,output_size,embedding_size,hidden_size,encoder_layer_size,decoder_layer_size,cell_type,beam_width,dropout,bidirectional,learning_rate=0.0001):
    super(seq2seq,self).__init__()
    self.output_size = output_size
    self.cell_type = cell_type
    self.train_step_acc = []
    self.train_step_loss = []
    self.val_step_acc = []
    self.val_step_loss = []
    self.decoder_layer_size = decoder_layer_size #*  2 if bidirectional else 1
    self.bidirectional = bidirectional
    self.encoder_layer_size = encoder_layer_size 
    self.beam_width = beam_width
    self.encoder = Encoder(input_size,embedding_size,hidden_size,encoder_layer_size,cell_type,bidirectional,dropout)
    self.decoder = Decoder(output_size,embedding_size,hidden_size,decoder_layer_size,cell_type,bidirectional,dropout)
    self.learning_rate = learning_rate

  def beam_search(self,hidden,input,beam_width,output_len,output_seq):
    queue = []
    queue.append((input,1,hidden))
    for t in range(output_len):
      queue_temp = []
      for i in range(len(queue)):
        (input,prob_parent,hidden) = queue[i]
        output_rnn,hidden = self.decoder( input ,hidden)
        prob , index = torch.topk(output_rnn,beam_width)
        output_rnn = output_rnn.squeeze(1)
        for j in range(beam_width):
          prob_score = prob_parent*prob[0][j]
          if (prob_score > 1e-5):
            queue_temp.append((torch.tensor([index[0][j]]).to(device),prob_score,hidden))
      output_seq[t] = output_rnn
      queue = sorted(queue_temp,key = lambda x:x[1] , reverse = True)[:beam_width]
    return  output_seq

  def forward(self,input,output,tf = 0.5) :
    output_len = output.shape[1]
    batch_size = input.shape[0]
    output_size = self.output_size

    bidir = 2 if self.bidirectional else 1
    hidden = self.encoder(input.transpose(0,1))
    if self.encoder_layer_size > self.decoder_layer_size :
      if (self.cell_type == nn.LSTM):
        (hidden,cell) = hidden
        hidden = hidden[-1*self.decoder_layer_size* bidir :]
        cell = cell[-1*self.decoder_layer_size*  bidir:]
        hidden = (hidden,cell) 
      else:
        hidden = hidden[-1*self.decoder_layer_size* bidir:]
        
    elif self.encoder_layer_size < self.decoder_layer_size :
      cell = []
      if (self.cell_type == nn.LSTM):
        (hidden,cell) = hidden
        temp_hidden = hidden[-1 *bidir:]
        temp_cell = cell[-1 *bidir:]
        if(temp_hidden.shape[0] == 0) :
            temp_hidden = hidden
            temp_cell = cell
        for i in range(self.decoder_layer_size - self.encoder_layer_size):   
          hidden = torch.cat((hidden,temp_hidden) )
          cell = torch.cat((cell,temp_cell))
        hidden = (hidden,cell) 
      else :
        temp_hidden = hidden[-1 *bidir:]
        if(temp_hidden.shape[0] == 0) :
            temp_hidden = hidden
        for i in range(self.decoder_layer_size - self.encoder_layer_size):   
          hidden = torch.cat((hidden,temp_hidden),dim=0 )
    
    output_seq = torch.zeros(output.shape[0],batch_size,output_size).to(device)

    # print(hidden.shape)
    output = output.transpose(0,1)
    next_input = output[:,0]
    if self.beam_width == 1:
      for t in range(output.shape[1]):
        output_rnn,hidden = self.decoder( next_input ,hidden)
        output_seq[t] = output_rnn.squeeze(1)
        next_input =  output_seq[t].argmax(1) if random.random() > tf else output[:,t]
      return output_seq
    else :
      # print(output.shape[0])
      # output_seq_temp = torch.zeros(output.shape[0],1,output_size).to(device)
      # output_seq_temp = self.beam_search(hidden,next_input,self.beam_width,output.shape[1],output_seq)
      # output_seq = output_seq_temp

      for i in range(batch_size):
        output_seq_temp = torch.zeros(output.shape[1],1,output_size).to(device)
        # print(hidden.shape,next_input.shape,batch_size)
        # print(hidden[:,i,:]..shape)
        output_seq_temp = self.beam_search(hidden[:,i:i+1,:],next_input[i:i+1],self.beam_width,output.shape[1],output_seq_temp)
        output_seq[:,i:i+1,:] = output_seq_temp
      return output_seq


  def training_step(self,batch):
    input,output = batch

    output = output.permute(1,0)
    output_seq = self(input,output)
    output = output.permute(1,0)

    output_seq_2 = torch.zeros(output_seq.shape).to(device)
    batch_n = np.arange(len(output_seq))

    for f in range(len(output)):
      col = output[f]
      output_seq_2[batch_n,f,np.array(col.cpu())] = 1
    output_dim = output_seq.shape[-1]
    output_seq_t1 = output_seq[1:].view(-1,output_dim)
    output_seq_t2 = output_seq_2[1:].view(-1,output_dim)
    loss = nn.CrossEntropyLoss()
    loss = loss(output_seq_t1,output_seq_t2).mean()
    
    output = output.permute(1,0)
    output_ = torch.argmax(output_seq,2)
    acc_1 = torch.all(output_[1:-1,:] == output[1:-1,:],dim=0)
    acc = torch.sum(acc_1 == True)/len(acc_1)

    self.log('train_loss', loss,on_epoch = True,on_step = False,prog_bar=True)
    self.train_step_loss.append(loss)
    self.log('train_acc', acc,on_epoch = True,on_step = False,prog_bar=True)
    self.train_step_acc.append(acc)

    return loss



  def on_train_epoch_end(self):
    
    train_acc =  torch.stack(self.train_step_acc).mean()
    train_loss =  torch.stack(self.train_step_loss).mean()
    val_acc =  torch.stack(self.val_step_acc).mean()
    val_loss =  torch.stack(self.val_step_loss).mean()
    print("train_loss:",train_loss.item(),"train_acc",train_acc.item(),"val_loss:",val_loss.item(),"val_acc",val_acc.item())
    # wandb.log({"train_loss":train_loss.item(),"train_acc":train_acc.item(),"val_loss":val_loss.item(),"val_acc":val_acc.item()})
    self.train_step_acc.clear() 
    self.train_step_loss.clear() 
    self.val_step_acc.clear() 
    self.val_step_loss.clear() 


  def validation_step(self, batch,batch_idx):
    input,output = batch

    output = output.permute(1,0)
    output_seq = self(input,output,0)
    output = output.permute(1,0)

    output_seq_2 = torch.zeros(output_seq.shape).to(device)
    batch_n = np.arange(len(output_seq))

    for f in range(len(output)):
      col = output[f]
      output_seq_2[batch_n,f,np.array(col.cpu())] = 1

    output_dim = output_seq.shape[-1]
  
    output_seq_t1 = output_seq[1:].view(-1,output_dim)
    output_seq_t2 = output_seq_2[1:].view(-1,output_dim)

    loss = nn.CrossEntropyLoss()
    loss = loss(output_seq_t1,output_seq_t2).mean()
    output = output.permute(1,0)
    output_ = torch.argmax(output_seq,2)
    acc_1 = torch.all(output_[1:-1,:] == output[1:-1,:],dim=0)
    acc = torch.sum(acc_1 == True)/len(acc_1)

    self.log('val_loss', loss,on_epoch = True,on_step = False,prog_bar=True)
    self.val_step_loss.append(loss)
    self.log('val_acc', acc,on_epoch = True,on_step = False,prog_bar=True)
    self.val_step_acc.append(acc)

    return loss

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(),lr= self.learning_rate)



# Model Training

In [18]:
model = seq2seq(input_size = len(latin_chars), output_size = len(lang_chars),
                embedding_size = 64, hidden_size = 256,encoder_layer_size = 2,
                decoder_layer_size = 3,cell_type = nn.LSTM,

                beam_width = 1,dropout= 0.2,
                bidirectional =True ,learning_rate = 0.001)
model.to(device)



trainer = pl.Trainer(max_epochs = 20)
trainer.fit(model, train_dataloader,val_dataloader)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 2.2 M 
1 | decoder | Decoder | 3.9 M 
------------------------------------
6.1 M     Trainable params
0         Non-trainable params
6.1 M     Total params
24.375    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

train_loss: 1.440787672996521 train_acc 0.0036328125279396772 val_loss: 1.1049903631210327 val_acc 0.03144434094429016


Validation: 0it [00:00, ?it/s]

train_loss: 0.5380157828330994 train_acc 0.12460937350988388 val_loss: 0.6353055834770203 val_acc 0.26322266459465027


Validation: 0it [00:00, ?it/s]

train_loss: 0.31647342443466187 train_acc 0.2957226634025574 val_loss: 0.4774453639984131 val_acc 0.4071289002895355


Validation: 0it [00:00, ?it/s]

train_loss: 0.26412585377693176 train_acc 0.3885156214237213 val_loss: 0.49360623955726624 val_acc 0.4592578113079071


Validation: 0it [00:00, ?it/s]

train_loss: 0.23925736546516418 train_acc 0.41748046875 val_loss: 0.3537483811378479 val_acc 0.5469921827316284


Validation: 0it [00:00, ?it/s]

train_loss: 0.19212715327739716 train_acc 0.4940820336341858 val_loss: 0.3130972683429718 val_acc 0.5879882574081421


Validation: 0it [00:00, ?it/s]

train_loss: 0.15578433871269226 train_acc 0.5297265648841858 val_loss: 0.2857522964477539 val_acc 0.6281836032867432


Validation: 0it [00:00, ?it/s]

train_loss: 0.14290009438991547 train_acc 0.5622460842132568 val_loss: 0.2455490082502365 val_acc 0.6536328196525574


Validation: 0it [00:00, ?it/s]

train_loss: 0.12984396517276764 train_acc 0.5923437476158142 val_loss: 0.2183215469121933 val_acc 0.6888867020606995


Validation: 0it [00:00, ?it/s]

train_loss: 0.108034648001194 train_acc 0.6310741901397705 val_loss: 0.2047000527381897 val_acc 0.71875


Validation: 0it [00:00, ?it/s]

train_loss: 0.09893524646759033 train_acc 0.6508398056030273 val_loss: 0.18195630609989166 val_acc 0.732714831829071


Validation: 0it [00:00, ?it/s]

train_loss: 0.1006832867860794 train_acc 0.6512694954872131 val_loss: 0.14882135391235352 val_acc 0.7626171708106995


Validation: 0it [00:00, ?it/s]

train_loss: 0.08408808708190918 train_acc 0.6927734017372131 val_loss: 0.16129086911678314 val_acc 0.7767187356948853


Validation: 0it [00:00, ?it/s]

train_loss: 0.07629336416721344 train_acc 0.7141405940055847 val_loss: 0.13105881214141846 val_acc 0.8014257550239563


Validation: 0it [00:00, ?it/s]

train_loss: 0.07279334217309952 train_acc 0.7215039134025574 val_loss: 0.1214744821190834 val_acc 0.8132030963897705


  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")



# Sweep Config

In [None]:


config= {
    'method': 'bayes',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize', 
        'name': 'val_acc'
      },
    "parameters":
    {
      
    "bidirectional" :{
        "values" : [True,False]
    },
    "dropout" :{
        "values" : [0,0.2,0.3]
    },
    "cell_type" :{
          "values" : ["RNN", "GRU", "LSTM"]
    },
      "epochs" :{
          "values" : [10,  15, 20]
    },
     
      "encoder_layers" :{
          "values" : [1,  2, 3]
    },
      "decoder_layers" :{
          "values" : [1,  2, 3]
    },
      "embedding_size" :{
          "values" : [16,32,64,256]
    },
      "hidden_layer_size" :{
          "values" : [16,32,64,256]
    },

      "learning_rate" :{
          "values" : [1e-3,1e-4]
    }

    }

}


# Sweep Function

In [None]:

# cell_map = {"RNN":nn.RNN, "GRU":nn.GRU, "LSTM":nn.LSTM}
# def sweeprun():

#   wandb.init()
#   bidirectional = wandb.config.bidirectional
#   dropout = wandb.config.dropout
#   cell_type = wandb.config.cell_type

#   encoder_layers = wandb.config.encoder_layers
#   decoder_layers = wandb.config.decoder_layers
#   epochs = wandb.config.epochs
#   learning_rate = wandb.config.learning_rate

#   embedding_size = wandb.config.embedding_size
#   hidden_layer_size = wandb.config.hidden_layer_size

#   run_name = "lr_{}_rnn_{}_dp_{}_bd_{}_el_{}_dl_{}_ep_{}_es_{}_hs_{}".format(learning_rate,cell_type, dropout, bidirectional,encoder_layers, decoder_layers,epochs,embedding_size,hidden_layer_size)

#   cell_type = cell_map[cell_type]
#   model = seq2seq(input_size = len(latin_chars), output_size = len(lang_chars),
#                 embedding_size = embedding_size, hidden_size = hidden_layer_size,encoder_layer_size = encoder_layers,
#                 decoder_layer_size = decoder_layers,cell_type = cell_type,
#                 beam_width = 1,dropout= dropout,
#                 bidirectional =bidirectional ,learning_rate = learning_rate)
#   model.to(device)

#   trainer = pl.Trainer(max_epochs=1,accelerator = 'gpu') 
#   trainer.fit(model, train_dataloader,val_dataloader)

#   wandb.run.name = run_name
#   wandb.finish()



# sweep_id = wandb.sweep(config,project="Assignment-03", entity = "saisreeram")
# wandb.agent(sweep_id, sweeprun)

In [None]:
a = [1]

In [None]:
a[0]


In [None]:
train_data[0]