# Installing Modules

In [None]:
!pip install wandb -qU
!pip install pytorch_lightning


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.5/199.5 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


# Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Importing Modules


In [None]:
import wandb
import torch 
import pytorch_lightning as pl
import torch.nn as nn
from torch.nn  import functional
from pytorch_lightning.loggers import WandbLogger
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as data
import numpy as np
import random
import csv
import pandas as pd
from torch.utils.data import Dataset, DataLoader


# Unzip data

In [None]:
!unzip /content/drive/MyDrive/dl/aksharantar_sampled.zip

# Connecting Wandb


In [None]:

# wandb.login(key="8d6c17aa48af2229c26cbc16513ef266358c0b96")
# wandb.init(project="Assignment-02")

# Data Loading

In [None]:
base_dir = "aksharantar_sampled/hin/"

train_file = base_dir+"hin_train.csv"
val_file = base_dir+"hin_train.csv"
test_file = base_dir+"hin_train.csv"

train_data = pd.read_csv(train_file,header=None)
val_data = pd.read_csv(val_file,header=None)
test_data = pd.read_csv(test_file,header=None)



latin_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':3,'<end>':4}
lang_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':3,'<end>':4}
for word in train_data[0]:
  for char in word :
    if char not in latin_chars:
      latin_chars[char] = len(latin_chars)

for word in train_data[1]:
  for char in word :
    if char not in lang_chars:
      lang_chars[char] = len(lang_chars)


latin_max_length = len(max(train_data[0],key = len))
lang_max_length = len(max(train_data[1],key = len))

In [None]:
def word_to_vec(data):
  data1= data.T
  data_pairs = []
  for i in range(0,len(data)):
    word =  [3]+[latin_chars.get(char,latin_chars['<UNK>']) for char in data1[i][0]] + [0]*(latin_max_length - len(data1[i][0]))+[4]
    latin_tensor = torch.tensor(word)
    word =  [3]+[lang_chars.get(char,lang_chars['<UNK>']) for char in data1[i][1]] + [0]*(lang_max_length - len(data1[i][1]))+[4]
    lang_tensor = torch.tensor(word)
    data_pairs.append([latin_tensor,lang_tensor])
  return data_pairs

# DataLoader

In [None]:
train_data_pairs = word_to_vec(train_data)
val_data_pairs = word_to_vec(val_data)
test_data_pairs = word_to_vec(test_data)

train_dataloader = DataLoader(train_data_pairs, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_data_pairs, batch_size=64, shuffle=False)
test_dataloader = DataLoader(test_data_pairs, batch_size=64, shuffle=False)


# Encoder

In [None]:
class Encoder(nn.Module):
  def __init__(self,input_size,embedding_size,hidden_size,layers,cell_type,bidirectional):
    super(Encoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(input_size,embedding_size)
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional)

  def forward(self,x):
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding)

    return hidden


# Decoder

In [None]:
class Decoder(nn.Module):
  def __init__(self,output_size,embedding_size,hidden_size,layers,cell_type,bidirectional):
    super(Decoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(output_size,embedding_size)
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional)
    self.out = nn.Linear(hidden_size,output_size)
    self.softmax = nn.LogSoftmax(dim=2)

    
  def forward(self,x,hidden):
    x = x.unsqueeze(1).transpose(0,1)
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding,hidden)
    output = self.softmax(self.out(output))
    return output,hidden




# Model

In [None]:
class seq2seq(pl.LightningModule):
  def __init__(self,input_size,output_size,embedding_size,hidden_size,encoder_layer_size,decoder_layer_size,cell_type,bidirectional,learning_rate=0.0001):
    super(seq2seq,self).__init__()
    self.output_size = output_size
    self.cell_type = cell_type
    self.train_step_acc = []
    self.train_step_loss = []
    self.val_step_acc = []
    self.val_step_loss = []

    self.encoder = Encoder(input_size,embedding_size,hidden_size,encoder_layer_size,cell_type,bidirectional)
    self.decoder = Decoder(output_size,embedding_size,hidden_size,decoder_layer_size,cell_type,bidirectional)
    self.learning_rate = learning_rate

  def forward(self,input,output,tf = 0.5) :
    output_len = output.shape[1]
    batch_size = input.shape[0]
    output_size = self.output_size
    hidden = self.encoder(input.transpose(0,1))
    output_seq = torch.zeros(output.shape[0],batch_size,output_size)
    use_tf = True if random.random() < tf else False 
    output = output.transpose(0,1)
    if use_tf:
      for t in range(output.shape[1]):
        output_rnn,hidden = self.decoder(output[:,t],hidden)
        output_seq[t] = output_rnn.squeeze(1)
    else :
      for t in range(output.shape[1]):
        output_rnn,hidden = self.decoder(output[:,t],hidden)
        output_seq[t] = output_rnn.squeeze(1)
    return output_seq

  def training_step(self,batch):
    input,output = batch
    output = output.permute(1,0)
    output_seq = self(input,output)
    output_seq = output_seq.permute(1,0,2)
    loss = functional.cross_entropy(output_seq.reshape(-1,output_seq.shape[2]),output.reshape(-1),ignore_index=0)
    self.log('train_loss', loss,on_epoch = True,on_step = False,prog_bar=True,metric_attribute="train_loss")
    # self.log('train_acc', acc,on_epoch = True,on_step = False,prog_bar=True,metric_attribute="train_acc")

    return loss

  def on_train_epoch_end(self):
    
    train_acc =  torch.stack(self.train_step_acc).mean()
    train_loss =  torch.stack(self.train_step_loss).mean()
    val_acc =  torch.stack(self.val_step_acc).mean()
    val_loss =  torch.stack(self.val_step_loss).mean()
    print("train_loss:",train_loss.item(),"train_acc:",train_acc.item(),"val_loss:",val_loss.item(),"val_acc:",val_acc.item())
    # wandb.log({"train_loss":train_loss.item(),"train_acc":train_acc.item(),"val_loss":val_loss.item(),"val_acc":val_acc.item()})
    self.train_step_acc.clear() 
    self.train_step_loss.clear() 
    self.val_step_acc.clear() 
    self.val_step_loss.clear() 


  def validation_step(self, batch,batch_idx):
    input,output = batch
    output = output.permute(1,0)
    output_seq = self(input,output)
    output_seq = output_seq.permute(1,0,2)
    loss = functional.cross_entropy(output_seq.reshape(-1,output_seq.shape[2]),output.reshape(-1),ignore_index=0)
    self.log('val_loss', loss,on_epoch = True,on_step = False,prog_bar=True,sync_dist=True)
    # self.log('val_acc', acc,on_epoch = True,on_step = False,prog_bar=True,sync_dist=True)
    return loss

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(),lr= self.learning_rate)



# Model Training

In [None]:
model = seq2seq(input_size = len(latin_chars), output_size = len(lang_chars),
                embedding_size = 256, hidden_size = 256,encoder_layer_size = 1,
                decoder_layer_size = 1,cell_type = nn.GRU,bidirectional = False,
                learning_rate = 0.0001)
trainer = pl.Trainer(max_epochs = 5)
trainer.fit(model, train_dataloader,val_dataloader)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 402 K 
1 | decoder | Decoder | 429 K 
------------------------------------
832 K     Trainable params
0         Non-trainable params
832 K     Total params
3.328     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


# Sweep Config

In [None]:


config= {
    'method': 'bayes',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize', 
        'name': 'val_acc'
      },
    "parameters":
    {
      
    "bidirectional" :{
        "values" : [True,False]
    },
    "dropout" :{
        "values" : [0.2,0.3,0.4]
    },
    "cell_type" :{
          "values" : ["RNN", "GRU", "LSTM"]
    },
      "epochs" :{
          "values" : [5,  10]
    },
     
      "encoder_layers" :{
          "values" : [1,  2]
    },
      "decoder_layers" :{
          "values" : [1,  2]
    },
      "embedding_size" :{
          "values" : [16,32,64,256]
    },
      "hidden_layer_size" :{
          "values" : [16,32,64,256]
    },

      "learning_rate" :{
          "values" : [1e-3,1e-4]
    }

    }

}


# Sweep Function

In [None]:

cell_map = {"RNN":nn.RNN, "GRU":nn.GRU, "LSTM":nn.LSTM()}
def sweeprun():

  wandb.init()
  bidirectional = wandb.config.bidirectional
  dropout = wandb.config.dropout
  cell_type = wandb.config.cell_type

  encoder_layers = wandb.config.encoder_layers
  decoder_layers = wandb.config.decoder_layers
  epochs = wandb.config.epochs
  learning_rate = wandb.config.learning_rate

  embedding_size = wandb.config.embedding_size
  hidden_layer_size = wandb.config.hidden_layer_size

  run_name = "lr_{}_rnn_{}_dp_{}_bd_{}_el_{}_dl_{}_ep_{}_es_{}_hs_{}".format(learning_rate,cell_type, dropout, bidirectional,encoder_layers, decoder_layers,epochs,embedding_size,hidden_layer_size)

  cell_type = cell_map[cell_type]
  model = seq2seq(len(latin_chars), len(lang_chars),embedding_size, hidden_layer_size,cell_type,bidirectional,learning_rate)
  trainer = pl.Trainer(max_epochs=epochs) 
  trainer.fit(model, train_dataloader,val_dataloader)

  wandb.run.name = run_name
  wandb.finish()



sweep_id = wandb.sweep(config,project="Assignment-03", entity = "saisreeram")
wandb.agent(sweep_id, sweeprun)

tensor([39, 16, 24, 29,  5, 25, 16,  8, 29,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [None]:
train_data_pairs


[[tensor([2, 3, 4, 2, 5, 6, 4, 7, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0]),
  tensor([2, 3, 4, 5, 4, 6, 7, 8, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0])]]

In [None]:
train_data_pairs

[[tensor([3, 4, 5, 6, 4, 7, 8, 6, 9, 6, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 4]),
  tensor([ 3,  4,  5,  6,  7,  6,  8,  9, 10,  9,  8,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  4])],
 [tensor([ 3, 10, 11, 12, 13,  5, 14,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  4]),
  tensor([ 3, 11, 12, 13,  6, 14,  6, 15,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  4])],
 [tensor([ 3, 15, 11,  8,  6, 12, 15,  6, 12,  7,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  4]),
  tensor([ 3, 16, 12,  8, 17, 16,  9, 18,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  4])],
 [tensor([ 3, 14,  6,  9, 14, 16, 17,  6, 18, 19, 19,  7,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  4]),
  tensor([ 3, 15, 19,  6, 20, 21, 22, 23, 24,  7,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  4])],
 [tensor([ 3,  8,  6,  7,  6, 12, 11,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

In [None]:
train_data[0]

0        shastragaar
1            bindhya
2          kirankant
3        yagyopaveet
4            ratania
            ...     
51195          toned
51196     mutanaazaa
51197      asahmaton
51198      sulgaayin
51199    anchuthengu
Name: 0, Length: 51200, dtype: object