# Installing Modules

In [55]:
!pip install wandb -qU
!pip install pytorch_lightning


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Drive Mount

In [56]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Importing Modules


In [57]:
import wandb
import torch 
import pytorch_lightning as pl
import torch.nn as nn
from torch.nn  import functional
from pytorch_lightning.loggers import WandbLogger
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as data
import numpy as np
import random
import csv
import pandas as pd
from torch.utils.data import Dataset, DataLoader


# Unzip data

In [58]:
!unzip /content/drive/MyDrive/dl/aksharantar_sampled.zip

Archive:  /content/drive/MyDrive/dl/aksharantar_sampled.zip
replace aksharantar_sampled/asm/asm_test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


# Connecting Wandb


In [59]:

# wandb.login(key="8d6c17aa48af2229c26cbc16513ef266358c0b96")
# wandb.init(project="Assignment-02")

# Data Loading

In [60]:
base_dir = "aksharantar_sampled/mal/"

train_file = base_dir+"mal_train.csv"
val_file = base_dir+"mal_train.csv"
test_file = base_dir+"mal_train.csv"

train_data = pd.read_csv(train_file,header=None)
val_data = pd.read_csv(val_file,header=None)
test_data = pd.read_csv(test_file,header=None)



latin_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':2,'<end>':3}
lang_chars = {'<PAD>': 0, '<UNK>': 1,'<start>':2,'<end>':3}
for word in train_data[0]:
  for char in word :
    if char not in latin_chars:
      latin_chars[char] = len(latin_chars)

for word in train_data[1]:
  for char in word :
    if char not in lang_chars:
      lang_chars[char] = len(lang_chars)


latin_max_length = len(max(train_data[0],key = len))
lang_max_length = len(max(train_data[1],key = len))

In [61]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def word_to_vec(data):
  data1= data.T
  data_pairs = []
  for i in range(0,len(data)):
    word =  [2]+[latin_chars.get(char,latin_chars['<UNK>']) for char in data1[i][0]] + [0]*(latin_max_length - len(data1[i][0]))+[3]
    latin_tensor = torch.tensor(word).to(device)
    word =  [2]+[lang_chars.get(char,lang_chars['<UNK>']) for char in data1[i][1]] + [0]*(lang_max_length - len(data1[i][1]))+[3]
    lang_tensor = torch.tensor(word).to(device)
    data_pairs.append([latin_tensor,lang_tensor])
  return data_pairs

# DataLoader

In [62]:
train_data_pairs = word_to_vec(train_data)
val_data_pairs = word_to_vec(val_data)
test_data_pairs = word_to_vec(test_data)

train_dataloader = DataLoader(train_data_pairs, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data_pairs, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_data_pairs, batch_size=32, shuffle=False)


# Encoder

In [63]:
class Encoder(nn.Module):
  def __init__(self,input_size,embedding_size,hidden_size,layers,cell_type,bidirectional,dropout):
    super(Encoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(input_size,embedding_size)
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional,dropout=dropout)

  def forward(self,x):
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding)

    return hidden


# Decoder

In [64]:
class Decoder(nn.Module):
  def __init__(self,output_size,embedding_size,hidden_size,layers,cell_type,bidirectional,dropout):
    super(Decoder,self).__init__()
    self.cell_type = cell_type
    self.embedding = nn.Embedding(output_size,embedding_size) 
    self.rnn = cell_type(embedding_size,hidden_size,layers,bidirectional = bidirectional,dropout=dropout)
    if bidirectional:
      self.out = nn.Linear(hidden_size*2,output_size) 
    else :
      self.out = nn.Linear(hidden_size,output_size) 

    
  def forward(self,x,hidden):
    x = x.unsqueeze(1).transpose(0,1)
    embedding = self.embedding(x)
    output,hidden = self.rnn(embedding,hidden)
    output = self.out(output.squeeze(0))

    return output,hidden




# Model

In [73]:
class seq2seq(pl.LightningModule):
  def __init__(self,input_size,output_size,embedding_size,hidden_size,encoder_layer_size,decoder_layer_size,cell_type,dropout,bidirectional,learning_rate=0.0001):
    super(seq2seq,self).__init__()
    self.output_size = output_size
    self.cell_type = cell_type
    self.train_step_acc = []
    self.train_step_loss = []
    self.val_step_acc = []
    self.val_step_loss = []
    self.decoder_layer_size = decoder_layer_size #*  2 if bidirectional else 1
    self.bidirectional = bidirectional
    self.encoder_layer_size = encoder_layer_size 
    self.encoder = Encoder(input_size,embedding_size,hidden_size,encoder_layer_size,cell_type,bidirectional,dropout)
    self.decoder = Decoder(output_size,embedding_size,hidden_size,decoder_layer_size,cell_type,bidirectional,dropout)
    self.learning_rate = learning_rate

  def forward(self,input,output,tf = 1) :
    output_len = output.shape[1]
    batch_size = input.shape[0]
    output_size = self.output_size
    hidden = self.encoder(input.transpose(0,1))
    if self.encoder_layer_size > self.decoder_layer_size :
      hidden = hidden[-1*self.decoder_layer_size*  2 if self.bidirectional else 1:]
    elif self.encoder_layer_size < self.decoder_layer_size :
      for i in range(self.decoder_layer_size - self.encoder_layer_size):
        temp_hidden = hidden[-1 *2 if self.bidirectional else 1:]
        if(temp_hidden.shape[0] == 0) :
          temp_hidden = hidden

        hidden = torch.cat((hidden,temp_hidden) )
    # print("hidden encoder",hidden.shape)
    # hidden = torch.sum(hidden,dim=0).reshape(1,64,256)
    # print("hidden encoder",hidden.shape)

    output_seq = torch.zeros(output.shape[0],batch_size,output_size).to(device)


    output = output.transpose(0,1)
    next_input = output[:,0]

    for t in range(output.shape[1]):
      output_rnn,hidden = self.decoder( next_input ,hidden)
      # print("hidden decoder",hidden.shape)
      output_seq[t] = output_rnn.squeeze(1)
      next_input =  output_seq[t].argmax(1) if random.random() < tf else output[:,t]
    # else :
    #   for t in range(output.shape[1]):
    #     output_rnn,hidden = self.decoder(output[:,t],hidden)

    #     output_seq[t] = output_rnn.squeeze(1)
    return output_seq

  def training_step(self,batch):
    input,output = batch

    output = output.permute(1,0)
    output_seq = self(input,output)
    output = output.permute(1,0)

    output_seq_2 = torch.zeros(output_seq.shape).to(device)
    batch_n = np.arange(len(output_seq))

    for f in range(len(output)):
      col = output[f]
      output_seq_2[batch_n,f,np.array(col.cpu())] = 1
    output_dim = output_seq.shape[-1]
    output_seq_t1 = output_seq[1:].view(-1,output_dim)
    output_seq_t2 = output_seq_2[1:].view(-1,output_dim)
    loss = nn.CrossEntropyLoss()
    loss = loss(output_seq_t1,output_seq_t2).mean()
    
    output = output.permute(1,0)
    output_ = torch.argmax(output_seq,2)
    acc_1 = torch.all(output_[1:-1,:] == output[1:-1,:],dim=0)
    acc = torch.sum(acc_1 == True)/len(acc_1)

    self.log('train_loss', loss,on_epoch = True,on_step = False,prog_bar=True)
    self.train_step_loss.append(loss)
    self.log('train_acc', acc,on_epoch = True,on_step = False,prog_bar=True)
    self.train_step_acc.append(acc)

    return loss



  def on_train_epoch_end(self):
    
    train_acc =  torch.stack(self.train_step_acc).mean()
    train_loss =  torch.stack(self.train_step_loss).mean()
    val_acc =  torch.stack(self.val_step_acc).mean()
    val_loss =  torch.stack(self.val_step_loss).mean()
    # print("train_loss:",train_loss.item(),"train_acc",train_acc.item(),"val_loss:",val_loss.item(),"val_acc",val_acc.item())
    wandb.log({"train_loss":train_loss.item(),"train_acc":train_acc.item(),"val_loss":val_loss.item(),"val_acc":val_acc.item()})
    self.train_step_acc.clear() 
    self.train_step_loss.clear() 
    self.val_step_acc.clear() 
    self.val_step_loss.clear() 


  def validation_step(self, batch,batch_idx):
    input,output = batch

    output = output.permute(1,0)
    output_seq = self(input,output,0)
    output = output.permute(1,0)

    output_seq_2 = torch.zeros(output_seq.shape).to(device)
    batch_n = np.arange(len(output_seq))

    for f in range(len(output)):
      col = output[f]
      output_seq_2[batch_n,f,np.array(col.cpu())] = 1

    output_dim = output_seq.shape[-1]
  
    output_seq_t1 = output_seq[1:].view(-1,output_dim)
    output_seq_t2 = output_seq_2[1:].view(-1,output_dim)

    loss = nn.CrossEntropyLoss()
    loss = loss(output_seq_t1,output_seq_t2).mean()
    output = output.permute(1,0)
    output_ = torch.argmax(output_seq,2)
    acc_1 = torch.all(output_[1:-1,:] == output[1:-1,:],dim=0)
    acc = torch.sum(acc_1 == True)/len(acc_1)

    self.log('val_loss', loss,on_epoch = True,on_step = False,prog_bar=True)
    self.val_step_loss.append(loss)
    self.log('val_acc', acc,on_epoch = True,on_step = False,prog_bar=True)
    self.val_step_acc.append(acc)

    return loss

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(),lr= self.learning_rate)



# Model Training

In [74]:
model = seq2seq(input_size = len(latin_chars), output_size = len(lang_chars),
                embedding_size = 32, hidden_size = 16,encoder_layer_size = 1,
                decoder_layer_size = 1,cell_type = nn.GRU,dropout= 0.2,
                bidirectional =False ,learning_rate = 0.001)
model.to(device)



trainer = pl.Trainer(max_epochs = 30,accelerator = 'gpu')
trainer.fit(model, train_dataloader,val_dataloader)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 3.4 K 
1 | decoder | Decoder | 6.0 K 
------------------------------------
9.3 K     Trainable params
0         Non-trainable params
9.3 K     Total params
0.037     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")



# Sweep Config

In [75]:


config= {
    'method': 'bayes',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize', 
        'name': 'val_acc'
      },
    "parameters":
    {
      
    "bidirectional" :{
        "values" : [True,False]
    },
    "dropout" :{
        "values" : [0,0.2,0.3]
    },
    "cell_type" :{
          "values" : ["RNN", "GRU", "LSTM"]
    },
      "epochs" :{
          "values" : [10,  15, 20]
    },
     
      "encoder_layers" :{
          "values" : [1,  2, 3]
    },
      "decoder_layers" :{
          "values" : [1,  2, 3]
    },
      "embedding_size" :{
          "values" : [16,32,64,256]
    },
      "hidden_layer_size" :{
          "values" : [16,32,64,256]
    },

      "learning_rate" :{
          "values" : [1e-3,1e-4]
    }

    }

}


# Sweep Function

In [76]:

cell_map = {"RNN":nn.RNN, "GRU":nn.GRU, "LSTM":nn.LSTM}
def sweeprun():

  wandb.init()
  bidirectional = wandb.config.bidirectional
  dropout = wandb.config.dropout
  cell_type = wandb.config.cell_type

  encoder_layers = wandb.config.encoder_layers
  decoder_layers = wandb.config.decoder_layers
  epochs = wandb.config.epochs
  learning_rate = wandb.config.learning_rate

  embedding_size = wandb.config.embedding_size
  hidden_layer_size = wandb.config.hidden_layer_size

  run_name = "lr_{}_rnn_{}_dp_{}_bd_{}_el_{}_dl_{}_ep_{}_es_{}_hs_{}".format(learning_rate,cell_type, dropout, bidirectional,encoder_layers, decoder_layers,epochs,embedding_size,hidden_layer_size)

  cell_type = cell_map[cell_type]
  model = seq2seq(input_size = len(latin_chars), output_size = len(lang_chars),
                embedding_size = embedding_size, hidden_size = hidden_layer_size,encoder_layer_size = encoder_layers,
                decoder_layer_size = decoder_layers,cell_type = nn.GRU,dropout= dropout,
                bidirectional =bidirectional ,learning_rate = learning_rate)
  model.to(device)

  trainer = pl.Trainer(max_epochs=epochs,accelerator = 'gpu') 
  trainer.fit(model, train_dataloader,val_dataloader)

  wandb.run.name = run_name
  wandb.finish()



sweep_id = wandb.sweep(config,project="Assignment-03", entity = "saisreeram")
wandb.agent(sweep_id, sweeprun)



Create sweep with ID: mj12bp8f
Sweep URL: https://wandb.ai/saisreeram/Assignment-03/sweeps/mj12bp8f
train_loss: 1.3767040967941284 train_acc 0.00011718749738065526 val_loss: 1.7126067876815796 val_acc 0.0


[34m[1mwandb[0m: Agent Starting Run: p01gfyn7 with config:
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 9.8 K 
1 | decoder | Decoder | 19.8 K
------------------------------------
29.5 K    Trainable params
0         Non-trainable params
29.5 K    Total params
0.118     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

train_loss: 1.296021580696106 train_acc 0.0002734375011641532 val_loss: 1.729116678237915 val_acc 0.0


Validation: 0it [00:00, ?it/s]

train_loss: 2.6413378715515137 train_acc 0.0 val_loss: 2.319828510284424 val_acc 0.0


Validation: 0it [00:00, ?it/s]

train_loss: 1.2359297275543213 train_acc 0.0006054687546566129 val_loss: 1.66923189163208 val_acc 0.0


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

train_loss: 2.1965718269348145 train_acc 0.0 val_loss: 2.140056848526001 val_acc 0.0
train_loss: 1.1798286437988281 train_acc 0.0011914062779396772 val_loss: 1.6194814443588257 val_acc 0.0


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

train_loss: 1.1375008821487427 train_acc 0.0020117186941206455 val_loss: 1.7031234502792358 val_acc 0.0
train_loss: 2.0038371086120605 train_acc 0.0 val_loss: 2.1558830738067627 val_acc 0.0


Validation: 0it [00:00, ?it/s]

train_loss: 1.100900650024414 train_acc 0.0031835937406867743 val_loss: 1.7733739614486694 val_acc 0.0


Validation: 0it [00:00, ?it/s]

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Error in callback <function _WandbInit._pause_backend at 0x7ff61d115ab0> (for post_run_cell):


BrokenPipeError: ignored

In [None]:
a = [1]

In [None]:
a[0]


In [None]:
train_data[0]