In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import random
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class MyDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file,names=["English","Hindi"],header=None)
        
    def __getitem__(self, index):
        x = self.data.iloc[index]["English"]
        y = self.data.iloc[index]["Hindi"]
        return x, y
    
    def __len__(self):
        return len(self.data)


In [None]:
train_data = MyDataset('/kaggle/input/transliteration/hin_train.csv')
train_dataloader = DataLoader(train_data, batch_size=16, shuffle=True)
test_data = MyDataset('/kaggle/input/transliteration/hin_test.csv')
test_dataloader = DataLoader(test_data, batch_size=16, shuffle=True)
val_data = MyDataset('/kaggle/input/transliteration/hin_valid.csv')
val_dataloader = DataLoader(val_data, batch_size=16, shuffle=True)

In [None]:
print(len(train_data))
ENGLEN=32
HINDILEN=32
BATCH_SIZE=128
englishwords=torch.full((len(train_data), ENGLEN), 2).to(device)
hindiwords=torch.full((len(train_data), HINDILEN), 2).to(device)
# hindivocab=[chr(i) for i in range(2304, 2432)]
# print(hindivocab.sort())
# print(hindivocab)

51200


In [None]:
hindivocab=set()
englishvocab=set()
for x,y in train_data:
    for letter in x:
        englishvocab.add(letter)
    for letter in y:
        hindivocab.add(letter)  
for x,y in test_data:
    for letter in x:
        englishvocab.add(letter)
    for letter in y:
        hindivocab.add(letter)
for x,y in test_data:
    for letter in x:
        englishvocab.add(letter)
    for letter in y:
        hindivocab.add(letter)
hindivocab=list(hindivocab)
hindivocab.sort()
englishvocab=list(englishvocab)
englishvocab.sort()
hindivocab.insert(0,'0')#start
hindivocab.insert(1,'1') #end
hindivocab.insert(2,'2') #pad
englishvocab.insert(0,'0')#start
englishvocab.insert(1,'1') #end
englishvocab.insert(2,'2') #pad
print(englishvocab)
hindidictc={}
englishdictc={}
hindidicti={}
englishdicti={}
for i in range(len(hindivocab)):
    hindidicti[i]=hindivocab[i]
    hindidictc[hindivocab[i]]=i
for i in range(len(englishvocab)):
    englishdicti[i]=englishvocab[i]
    englishdictc[englishvocab[i]]=i

c=0
for x,y in train_data:
    for i in range(len(x)):
        englishwords[c][i]=englishdictc[x[i]]
    for i in range(len(y)):
        hindiwords[c][i]=hindidictc[y[i]]
    hindiwords[c][i+1]=1
    c+=1

englishwordsval=torch.full((len(val_data), ENGLEN), 2).to(device)
hindiwordsval=torch.full((len(val_data), HINDILEN), 2).to(device)
c=0
for x,y in test_data:
    for i in range(len(x)):
        englishwordsval[c][i]=englishdictc[x[i]]
    for i in range(len(y)):
        hindiwordsval[c][i]=hindidictc[y[i]]
    hindiwordsval[c][i+1]=1
    c+=1

['0', '1', '2', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [None]:
print(len(hindivocab))

68


In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size,hidden_size,embedding_size,num_layers):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(0.2)
        self.num_layers=num_layers
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size,num_layers,dropout=0.2,bidirectional=True)

    def forward(self, inp, hidden):
        embedded = self.dropout(self.embedding(inp))
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        #for bidirection
        return torch.zeros(2*self.num_layers,BATCH_SIZE,self.hidden_size, device=device)

    
class DecoderRNN(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_layers,output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(0.2)
        self.num_layers=num_layers
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size,hidden_size,num_layers,dropout=0.2)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=2)

    def forward(self, inp, hidden):
        embedded = self.dropout(self.embedding(inp))
        output, hidden = self.gru(embedded, hidden)
        output1=self.out(output)
        return output1, hidden

    def initHidden(self):
        return torch.zeros(self.num_layers,BATCH_SIZE,self.hidden_size, device=device)
            
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder,hencoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.hencoder=hencoder
    def forward(self, inp, target,teacher_force_ratio):
        outputs = torch.zeros(HINDILEN,BATCH_SIZE ,len(hindivocab)).to(device)
        p,hencoder3d=self.encoder.forward(inp.to(device),self.hencoder)   
        tempdecoder=torch.zeros(2,BATCH_SIZE,hencoder3d.size()[2]).to(device)
        tempdecoder[0]=hencoder3d[hencoder3d.size()[0]//2-1]
        tempdecoder[1]=hencoder3d[(hencoder3d.size()[0]//2)*2-1]
        hdecoder=tempdecoder.mean(dim=0)
        hdecoder=hdecoder.repeat(self.decoder.num_layers,1,1)
#         print(p.size())
#         hencoder4d=hencoder3d.view(hencoder3d.size()[0]//2,hencoder3d.size()[0]//2,hencoder3d.size()[1],hencoder3d.size()[2])
#         hdecoder=self.decoder.initHidden()
#         for it in range(hencoder3d.size()//2):
#             hdecoder[it]=hencoder4d[it].mean(dim=0)
        x=torch.full((1,BATCH_SIZE),hindidictc['0'])
        output,hdecoder=self.decoder.forward(x.to(device),hdecoder)
        outputs[0]=output
        t=1
        if random.random() > teacher_force_ratio:
            for i in range(1,HINDILEN):
                output=self.decoder.softmax(output)
                nextinp=torch.argmax(output, dim=2)
                output,hdecoder=self.decoder.forward(nextinp.to(device),hdecoder)
                outputs[t]=output
                t+=1
        else:            
            for i in range(1,HINDILEN):
                nextinp=target[i-1,:].unsqueeze(0)
                output,hdecoder=self.decoder.forward(nextinp.to(device),hdecoder)
                outputs[t]=output
                t+=1
        return outputs
        
    

def train(encoder,decoder,seq2seq):
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction="sum")
    loss=0
    count=0
    numbatches=englishwords.shape[0]//BATCH_SIZE
    for ep in range(5):
        trainloss=0
        for i in range(numbatches):
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            temp=englishwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            temph=hindiwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            temp=temp.t()
            temph=temph.t()
            output=seq2seq.forward(temp,temph,0.5)
            output = output[:].reshape(-1, output.shape[2])
            tem = temph[:].reshape(-1)
            loss=criterion(output,tem)
            loss.backward()
            trainloss+=loss.item()
            torch.nn.utils.clip_grad_norm_(decoder.parameters(),max_norm = 1)
            torch.nn.utils.clip_grad_norm_(encoder.parameters(),max_norm = 1)
            encoder_optimizer.step()
            decoder_optimizer.step()
        train_correct=accuracy(seq2seq,englishwords,hindiwords)
        val_correct=accuracy(seq2seq,englishwordsval,hindiwordsval)
        print(ep,trainloss/(51200*HINDILEN),train_correct,val_correct)
    
def accuracy(seq2seq,english,hindi):
    numbatches=english.shape[0]//BATCH_SIZE
    correct=0
    for i in range(numbatches):
        temp=english[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        temph=hindi[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        temp=temp.t()
        temph=temph.t()
        output=seq2seq.forward(temp,temph,0)
        output=nn.Softmax(dim=2)(output)
        output=torch.argmax(output,dim=2)
        temph=temph.t()
        output=output.t()
        for i in range(BATCH_SIZE):
            if(torch.equal(output[i],temph[i])):
                correct+=1
    return correct


In [None]:
encoder=EncoderRNN(len(englishvocab),256,256,2).to(device)
decoder=DecoderRNN(len(hindivocab),256,256,2,len(hindivocab)).to(device)
hencoder=encoder.initHidden()
seq2seq=Seq2Seq(encoder,decoder,hencoder)
train(encoder,decoder,seq2seq)



0 0.5310823743976653 5969 608
1 0.3353901833668351 10580 927
2 0.29768058070912956 11915 968
3 0.274017201801762 14529 1121
4 0.25678213089704516 14455 1067


In [None]:
def forward( inp, target,teacher_force_ratio=0.5):
        outputs = torch.zeros(HINDILEN,BATCH_SIZE ,len(hindivocab)).to(device)
        _,hdecoder=encoder.forward(inp.to(device),hencoder)        
        x=torch.full((1,BATCH_SIZE),hindidictc['0'])
        output,hdecoder=decoder.forward(x.to(device),hdecoder)
        outputs[0]=output
        t=1
        for i in range(1,HINDILEN):
                output=decoder.softmax(output)
                nextinp=torch.argmax(output, dim=2)
                output,hdecoder=decoder.forward(nextinp.to(device),hdecoder)
                outputs[t]=output
                t+=1
        return outputs

In [None]:
def forwardbi( inp, target,teacher_force_ratio=0.5):
        outputs = torch.zeros(HINDILEN,BATCH_SIZE ,len(hindivocab)).to(device)
        _,hencoder3d=encoder.forward(inp.to(device),hencoder)   
        tempdecoder=torch.zeros(2,BATCH_SIZE,hencoder3d.size()[2]).to(device)
        tempdecoder[0]=hencoder3d[hencoder3d.size()[0]//2-1]
        tempdecoder[1]=hencoder3d[(hencoder3d.size()[0]//2)*2-1]
        hdecoder=tempdecoder.mean(dim=0)
        hdecoder=hdecoder.repeat(decoder.num_layers,1,1)       
        x=torch.full((1,BATCH_SIZE),hindidictc['0'])
        output,hdecoder=decoder.forward(x.to(device),hdecoder)
        outputs[0]=output
        t=1
        for i in range(1,HINDILEN):
                output=decoder.softmax(output)
                nextinp=torch.argmax(output, dim=2)
                output,hdecoder=decoder.forward(nextinp.to(device),hdecoder)
                outputs[t]=output
                t+=1
        return outputs

In [None]:
def accuracy(englishwords,hindiwords):
    numbatches=englishwords.shape[0]//BATCH_SIZE
    correct=0
    for i in range(numbatches):
        temp=englishwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        temph=hindiwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        temp=temp.t()
        temph=temph.t()
        output=forwardbi(temp,temph)
        output=nn.Softmax(dim=2)(output)
        output=torch.argmax(output,dim=2)
        temph=temph.t()
        output=output.t()
        for i in range(BATCH_SIZE):
            if(torch.equal(output[i],temph[i])):
                correct+=1
        return correct

print(correct)
    
    

17597


In [None]:
englishwordsval=torch.full((len(val_data), ENGLEN), 2).to(device)
hindiwordsval=torch.full((len(val_data), HINDILEN), 2).to(device)

In [None]:
c=0
for x,y in test_data:
    for i in range(len(x)):
        englishwordsval[c][i]=englishdictc[x[i]]
    for i in range(len(y)):
        hindiwordsval[c][i]=hindidictc[y[i]]
    hindiwordsval[c][i+1]=1
    c+=1

In [None]:
numbatches=englishwordsval.shape[0]//BATCH_SIZE
correct=0
for i in range(numbatches):
    temp=englishwordsval[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
    temph=hindiwordsval[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
    temp=temp.t()
    temph=temph.t()
    output=forwardbi(temp,temph)
    output=nn.Softmax(dim=2)(output)
    output=torch.argmax(output,dim=2)
    temph=temph.t()
    output=output.t()
    for i in range(BATCH_SIZE):
        if(torch.equal(output[i],temph[i])):
            correct+=1


print(correct)
    
    

1291


In [None]:
# 32,128,512
# max size,bs,2*layer(1)*hidden

4095


In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder,hencoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.hencoder=hencoder
    def forward(self, inp, target,teacher_force_ratio=0.5):
        outputs = torch.zeros(HINDILEN,BATCH_SIZE ,len(hindivocab)).to(device)
        p,hencoder3d=self.encoder.forward(inp.to(device),self.hencoder)   
        tempdecoder=torch.zeros(2,BATCH_SIZE,hencoder3d.size()[2]).to(device)
        tempdecoder[0]=hencoder3d[hencoder3d.size()[0]//2-1]
        tempdecoder[1]=hencoder3d[(hencoder3d.size()[0]//2)*2-1]
        hdecoder=tempdecoder.mean(dim=0)
        hdecoder=hdecoder.repeat(self.decoder.num_layers,1,1)
        x=torch.full((1,BATCH_SIZE),hindidictc['0'])
        output,hdecoder=self.decoder.forward(x.to(device),hdecoder,p.to(device))
#         print(hdecoder.size())
        outputs[0]=output
        t=1
        if random.random() > teacher_force_ratio:
            for i in range(1,HINDILEN):
                output=self.decoder.softmax(output)
                nextinp=torch.argmax(output, dim=2)
                output,hdecoder=self.decoder.forward(nextinp.to(device),hdecoder,p.to(device))
                outputs[t]=output
                t+=1
        else:            
            for i in range(1,HINDILEN):
                nextinp=target[i-1,:].unsqueeze(0)
                output,hdecoder=self.decoder.forward(nextinp.to(device),hdecoder,p.to(device))
                outputs[t]=output
                t+=1
        return outputs
    
class Attention(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_layers,output_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(0.2)
        self.num_layers=num_layers
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.u=nn.Linear(hidden_size*2,hidden_size)
        self.w=nn.Linear(hidden_size,hidden_size)
        self.v=nn.Linear(hidden_size,1)
        self.gru = nn.GRU(2*hidden_size+embedding_size,hidden_size,num_layers,dropout=0.2)
        #first param will change if bidir is false
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=2)
    def forward(self, inp, hidden,encoder_output):
        hidden_r=hidden.mean(dim=0)
        hidden_r=hidden_r.repeat(HINDILEN,1,1)
        embedded = self.dropout(self.embedding(inp))
#         print(encoder_output.size())
        u1=self.u(encoder_output)
        w1=self.w(hidden_r)
        z=nn.Tanh()(u1+w1)
        et=self.v(z)
#         print(et)
        alpha=nn.Softmax(dim=0)(et)
#         print(alpha)
        ct = torch.sum(alpha * encoder_output, dim=0, keepdim=True)
        ctet=torch.cat((ct,embedded),dim=2)
        output,hidden=self.gru(ctet,hidden)
        output1=self.out(output)
#         print(output1.size(),hidden.size())
        return output1, hidden
    def initHidden(self):
        return torch.zeros(self.num_layers,BATCH_SIZE,self.hidden_size, device=device)

        
        
        

In [None]:
inp=torch.full((1,BATCH_SIZE),hindidictc['0']).to(torch.int32)
hidden=torch.full((1,BATCH_SIZE,256),hindidictc['0']).to(torch.float32)#to be repeated bs times in code
encoder_outputs=torch.full((32,BATCH_SIZE,256*2),hindidictc['0']).to(torch.float32)
attention=Attention(len(hindivocab),256,256,2,len(hindivocab))
attention.forward(inp,hidden,encoder_outputs)

  "num_layers={}".format(dropout, num_layers))


(tensor([[[ 0.0354,  0.3127,  0.2832,  ...,  0.0775,  0.0958,  0.1181],
          [-0.0638,  0.3168,  0.3439,  ..., -0.0098,  0.2600,  0.1728],
          [ 0.0404,  0.2177,  0.2104,  ...,  0.1613,  0.0616,  0.2749],
          ...,
          [ 0.0877,  0.2104,  0.3672,  ...,  0.2486, -0.0261,  0.2160],
          [-0.0459,  0.2812,  0.1697,  ...,  0.1973,  0.1747,  0.3285],
          [ 0.0590,  0.2805,  0.1684,  ...,  0.1164,  0.0374,  0.1961]]],
        grad_fn=<ViewBackward0>),
 tensor([[[ 0.2258, -0.2521, -0.4574,  ...,  0.1004,  0.2347,  0.2183],
          [-0.1059, -0.5455, -0.0390,  ...,  0.0606,  0.2736,  0.3108],
          [ 0.2226, -0.4079, -0.3475,  ...,  0.1928,  0.1755,  0.3311],
          ...,
          [ 0.1606, -0.2324, -0.2096,  ...,  0.0621,  0.2954,  0.0214],
          [-0.1228, -0.1411, -0.2231,  ...,  0.2590,  0.2142, -0.1669],
          [-0.1208,  0.1471, -0.3834,  ...,  0.0186,  0.2418,  0.1455]]],
        grad_fn=<StackBackward0>))

In [None]:
encoder=EncoderRNN(len(englishvocab),128,128,2).to(device)
decoder=Attention(len(hindivocab),128,128,2,len(hindivocab)).to(device)
hencoder=encoder.initHidden()
seq2seq=Seq2Seq(encoder,decoder,hencoder)
def train():
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction="sum")
    loss=0
    count=0
    numbatches=englishwords.shape[0]//BATCH_SIZE
    for ep in range(15):
        trainloss=0
        for i in range(numbatches):
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            temp=englishwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            temph=hindiwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            temp=temp.t()
            temph=temph.t()
            output=seq2seq.forward(temp,temph,0.5)
            output = output[:].reshape(-1, output.shape[2])
            tem = temph[:].reshape(-1)
            loss=criterion(output,tem)
            loss.backward()
            trainloss+=loss.item()
            torch.nn.utils.clip_grad_norm_(decoder.parameters(),max_norm = 1)
            torch.nn.utils.clip_grad_norm_(encoder.parameters(),max_norm = 1)
            encoder_optimizer.step()
            decoder_optimizer.step()
        print(ep,trainloss/(51200*HINDILEN))
    

train()

0 0.6984311667829752
1 0.431121762804687
2 0.3665090965479612
3 0.33561007618904115
4 0.31535186517983677
5 0.295670186560601
6 0.2784927454032004
7 0.2676304139196873
8 0.25241304429247974
9 0.2399233009573072
10 0.2382461895979941
11 0.22860895477235318
12 0.22379849020391704
13 0.21717242494225503
14 0.2138343177642673


In [None]:
def forwardbi( inp, target,teacher_force_ratio=0.5):
        outputs = torch.zeros(HINDILEN,BATCH_SIZE ,len(hindivocab)).to(device)
        p,hencoder3d=encoder.forward(inp.to(device),hencoder)   
        tempdecoder=torch.zeros(2,BATCH_SIZE,hencoder3d.size()[2]).to(device)
        tempdecoder[0]=hencoder3d[hencoder3d.size()[0]//2-1]
        tempdecoder[1]=hencoder3d[(hencoder3d.size()[0]//2)*2-1]
        hdecoder=tempdecoder.mean(dim=0)
        hdecoder=hdecoder.repeat(decoder.num_layers,1,1)       
        x=torch.full((1,BATCH_SIZE),hindidictc['0'])
        output,hdecoder=decoder.forward(x.to(device),hdecoder,p)
        outputs[0]=output
        t=1
        for i in range(1,HINDILEN):
                output=decoder.softmax(output)
                nextinp=torch.argmax(output, dim=2)
                output,hdecoder=decoder.forward(nextinp.to(device),hdecoder,p)
                outputs[t]=output
                t+=1
        return outputs