In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import random
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class MyDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        
    def __getitem__(self, index):
        x = self.data.iloc[index,0]
        y = self.data.iloc[index,1]
        return x, y
    
    def __len__(self):
        return len(self.data)


In [3]:
train_data = MyDataset('/kaggle/input/transliteration/hin_train.csv')
train_dataloader = DataLoader(train_data, batch_size=16, shuffle=True)
test_data = MyDataset('/kaggle/input/transliteration/hin_test.csv')
test_dataloader = DataLoader(test_data, batch_size=16, shuffle=True)
val_data = MyDataset('/kaggle/input/transliteration/hin_valid.csv')
val_dataloader = DataLoader(val_data, batch_size=16, shuffle=True)

In [4]:
print(len(train_data))
ENGLEN=32
HINDILEN=32
BATCH_SIZE=1024
englishwords=torch.full((len(train_data), ENGLEN), 2).to(device)
hindiwords=torch.full((len(train_data), HINDILEN), 2).to(device)


51199


In [5]:
hindivocab=set()
englishvocab=set()
for x,y in train_data:
    for letter in x:
        englishvocab.add(letter)
    for letter in y:
        hindivocab.add(letter)  
        
hindivocab=list(hindivocab)
hindivocab.sort()
englishvocab=list(englishvocab)
englishvocab.sort()
hindivocab.insert(0,'0')#start
hindivocab.insert(1,'1') #end
hindivocab.insert(2,'2') #pad
englishvocab.insert(0,'0')#start
englishvocab.insert(1,'1') #end
englishvocab.insert(2,'2') #pad
hindidictc={}
englishdictc={}
hindidicti={}
englishdicti={}
for i in range(len(hindivocab)):
    hindidicti[i]=hindivocab[i]
    hindidictc[hindivocab[i]]=i
for i in range(len(englishvocab)):
    englishdicti[i]=englishvocab[i]
    englishdictc[englishvocab[i]]=i

c=0
for x,y in train_data:
    for i in range(len(x)):
        englishwords[c][i]=englishdictc[x[i]]
    for i in range(len(y)):
        hindiwords[c][i]=hindidictc[y[i]]
    hindiwords[c][i+1]=1
    c+=1

In [6]:
# temp=torch.full((32,16), 2).to(device)
# temph=torch.full((64, 16), 2).to(device)
class EncoderRNN(nn.Module):
    def __init__(self, input_size,hidden_size,embedding_size,num_layers):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(0.8)
        self.num_layers=num_layers
        #input size is eng vocab size
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size,num_layers,dropout=0.8)

    def forward(self, inp, hidden):
        embedded = self.dropout(self.embedding(inp))
        output=embedded
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(self.num_layers,BATCH_SIZE,self.hidden_size, device=device)

    
class DecoderRNN(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_layers,output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(0.8)
        #input size is hindi vocab size
        self.num_layers=num_layers
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size,hidden_size,num_layers,dropout=0.8)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, inp, hidden):
#         inp = inp.unsqueeze(0)
        embedded = self.dropout(self.embedding(inp))
        output, hidden = self.gru(embedded, hidden)
        output1=self.out(output)
        output2 =self.softmax(output1)
#         output=output.squeeze(0)
        return output2, hidden

    def initHidden(self):
        return torch.zeros(self.num_layers,BATCH_SIZE,self.hidden_size, device=device)
            
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    def forward(self, inp, target,teacher_force_ratio=0.5):
        outputs = torch.zeros(HINDILEN,BATCH_SIZE ,len(hindivocab)).to(device)
        hencoder=self.encoder.initHidden()
        _,hencoder=self.encoder.forward(inp.to(device),hencoder)
#         for i in range(32):
#             v=inp[:,i]
#             v=v.unsqueeze(0)
#             _,hencoder=self.encoder.forward(v.to(device),hencoder)
            
        x=torch.full((1,BATCH_SIZE),hindidictc['0'])
        output,hdecoder=self.decoder.forward(x.to(device),hencoder)
        outputs[0]=output
#         targetind=[hindidictc[target[0]]]
        t=1
        teacher_forcing_ratio=0.5
        if random.random() < teacher_forcing_ratio:
            for i in range(1,HINDILEN):
                nextinp=torch.argmax(output, dim=2)
    #             nextinp=target[i,:].unsqueeze(0)
                output,hdecoder=self.decoder.forward(nextinp.to(device),hdecoder)
                outputs[t]=output
                t+=1
        else:            
            for i in range(1,HINDILEN):
#                 nextinp=torch.argmax(output, dim=2)
                nextinp=target[i,:].unsqueeze(0)
                output,hdecoder=self.decoder.forward(nextinp.to(device),hdecoder)
                outputs[t]=output
                t+=1

        return outputs
        
        
def train():
    encoder=EncoderRNN(len(englishvocab),256,128,2).to(device)
    decoder=DecoderRNN(len(hindivocab),256,128,2,len(hindivocab)).to(device)
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=0.01)
    seq2seq=Seq2Seq(encoder,decoder)
    criterion = nn.CrossEntropyLoss()
    loss=0
    count=0
    numbatches=englishwords.shape[0]//BATCH_SIZE
    for ep in range(100):
        trainloss=0
        for i in range(numbatches):
            temp=englishwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            temph=hindiwords[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            temp=temp.t()
            temph=temph.t()
            output=seq2seq.forward(temp,temph)
            output = output[:].reshape(-1, output.shape[2])
            tem = temph[:].reshape(-1)
            loss=criterion(output,tem)
            trainloss+=loss.item()/(BATCH_SIZE*HINDILEN)
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            loss.backward(retain_graph=True)
            encoder_optimizer.step()
            decoder_optimizer.step()
        print(trainloss)

#     for x,y in train_dataloader:
#         x=[torch.tensor([englishdictc[c] for c in j]) for j in x]
#         y=[torch.tensor([englishdictc[c] for c in j]) for j in x]
#         x=torch.nn.utils.rnn.pad_sequence(x, batch_first=True, padding_value=2)
#         print(x)
#         count+=1
#         outputs=seq2seq.forward(x[0],y[0])
#         loss=criterion(outputs,target.to(device))
#         if(count%1000==0):
#             print(loss)
#         encoder_optimizer.zero_grad()
#         decoder_optimizer.zero_grad()
#         loss.backward(retain_graph=True)
#         encoder_optimizer.step()
#         decoder_optimizer.step()

       
        

train()

0.006287534968578257
0.006287535026785918
0.006287534823059104
0.006287534968578257
0.0062875347066437826
0.006287534590228461
0.0062875348958186805
0.006287534473813139
0.006287534983130172


KeyboardInterrupt: 

TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not str