In [None]:
pip install sacrebleu==1.2.11

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext
from torchtext.legacy.data import Field, BucketIterator, TabularDataset
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import spacy
import numpy as np
import random
import math
import time
import re
import pandas as pd
from py_tokenizer import *

In [4]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [5]:
#!python -m spacy download en
spacy_en = spacy.load('en')
spacy_py = spacy.load('en')


In [7]:
def tokenize_en(text):

  """
  Tokenizes English text from a string into a list of strings
  """
  return [tok.text for tok in spacy_en.tokenizer(text)]

def tokenize_py(text):
  """
    Tokenizes Python Program from a string into a list of strings
    """
  temp = " ".join(tokenize_python(text))
  return [tok.text for tok in spacy_py.tokenizer(temp)]

In [None]:
#from google.colab import drive
#drive.mount('/content/gdrive')

In [8]:
fundict ={}
with open('Cleaned_Eng_Python_Data.txt', 'r') as fR:

    #with open('fileWithoutComments.txt', 'w', encoding='utf-8') as fw:
    for i in fR.readlines():

        # print(i, end='')
        # print(type(i))
        if i.startswith('#') and  ('write' in i.lower() or 'python' in i.lower() or \
            'program' in i.lower() or 'function' in i.lower() or 'generate' in i.lower() or \
            'code' in i.lower() or 'given' in i.lower() or 'find' in i.lower() or 'calculate' in i.lower() or\
            'class' in i.lower() or 'define' in i.lower() or 'check' in i.lower() or 'compute' in i.lower() \
            or 'script' in i.lower() or 'calculate' in i.lower()):
            # print(i)
            key = i[1:]
            

            fundict[key]=''
            continue
        else:
            fundict[key]= fundict[key] + i

print(len(fundict))   

3335


In [9]:
src = []
trg = []

src_len = []
trg_len = []

for k,v in fundict.items():
  if len(k)<300 and len(v)<400:
    src.append(k)
    trg.append(v)
    src_len.append(len(k))
    trg_len.append(len(v))
    
print(f'Key_len:{max(src_len)} and Value: {max(trg_len)}, english sentence:{len(src)} and python code:{len(trg)}')


Key_len:257 and Value: 399, english sentence:3071 and python code:3071


In [10]:
src_len=[]
trg_len =[]
for i in range(len(src)):
  src_len.append(len(src[i]))
  trg_len.append(len(trg[i]))

print(max(src_len), max(trg_len))



257 399


In [11]:
raw_data = {'English' : [line for line in src], 'Python': [line for line in trg]}
df = pd.DataFrame(raw_data, columns=["English", "Python"])


In [12]:
from sklearn.model_selection import train_test_split
# create train and validation set 
train, val = train_test_split(df, test_size=0.1)
#train, val, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])

train.to_csv("train.csv", index=False)
val.to_csv("val.csv", index=False)
#test.to_csv("test.csv", index=False)


In [13]:
SRC = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = tokenize_py, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = False, 
            batch_first = True)

In [14]:
data_fields = [('English', SRC), ('Python', TRG)]
train,val = TabularDataset.splits(path='./', train='train.csv', validation='val.csv', format='csv', fields=data_fields)

In [15]:
SRC.build_vocab(train)
TRG.build_vocab(train)

In [16]:
print(f"Number of training examples: {len(train.examples)}")
print(f"Number of validation examples: {len(val.examples)}")

Number of training examples: 2764
Number of validation examples: 309


In [17]:
print(vars(train.examples[1]))
print(vars(val.examples[1]))

{'English': [' ', 'write', 'a', 'python', 'function', 'to', 'extract', 'odd', 'length', 'words', 'in', 'string'], 'Python': ['def', 'findoddlenthwords', '(', 'test_str', ')', ':', 'NEW_LINE', 'INDENT', 'res', '=', '[', ']', 'NEW_LINE', 'for', 'ele', 'in', 'test_str', '.', 'split', '(', ')', ':', 'NEW_LINE', 'INDENT', 'if', 'len', '(', 'ele', ')', '%', '2', ':', 'NEW_LINE', 'INDENT', 'res', '.', 'append', '(', 'ele', ')', 'NEW_LINE', 'DEDENT', 'DEDENT', 'return', 'res', 'NEW_LINE', 'DEDENT']}
{'English': [' ', 'write', 'a', 'python', 'program', 'to', 'typecast', 'given', 'input', 'to', 'float'], 'Python': ['num', '=', 'float', '(', 'input', '(', '"', 'Input', '▁', 'a', '▁', 'value', ':', '▁', '"', ')', ')', 'NEW_LINE', 'print', '(', 'num', ')', 'NEW_LINE']}


In [19]:
n=7
print(' '.join([str(elem) for elem in vars(train.examples[n])['English'][1:]]))
print(detokenize_python(vars(train.examples[n])['Python']))
print(' '.join([str(elem) for elem in vars(val.examples[n])['English'][1:]]))
#convert_format(vars(val.examples[1])['Python'])
print(detokenize_python(vars(val.examples[n])['Python']))


write a python program to input a number n and print an inverted star pattern of the desired size .
n = int ( input ( "Enter number of rows: " ) )
for i in range ( n , 0 , - 1 ) :
    print ( ( n - i ) * ' ' + i * '*' )

write a program to write a string in a file
filename = 'file1.txt'
string = "programming in \n python"
f1 = open ( filename , 'w' )
f1.write ( string )
f1.close ( )



In [16]:
# print(' '.join([str(elem) for elem in vars(train.examples[1])['English']]))
# print(' '.join([str(elem) for elem in vars(train.examples[1])['Python']]))
# print(' '.join([str(elem) for elem in vars(val.examples[1])['English']]))

# print(' '.join([str(elem) for elem in vars(val.examples[1])['Python']]))



In [20]:
print(f"Unique tokens in source (de) vocabulary: {len(SRC.vocab)}")
print(f"Unique tokens in target (en) vocabulary: {len(TRG.vocab)}")

Unique tokens in source (de) vocabulary: 1869
Unique tokens in target (en) vocabulary: 4266


In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [22]:
BATCH_SIZE = 64

train_iterator, valid_iterator = BucketIterator.splits((train, val),sort_key=lambda x: len(x.English),\
                                                       sort_within_batch=False, batch_size = BATCH_SIZE, \
                                                       device = device)

In [23]:
class Encoder(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim,
                 dropout, 
                 device,
                 max_length = 260):
        super().__init__()

        self.device = device
        
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([EncoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim,
                                                  dropout, 
                                                  device) 
                                     for _ in range(n_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len]
        #src_mask = [batch size, 1, 1, src len]
        
        batch_size = src.shape[0]
        #print(f'batch_size:{batch_size}')
        src_len = src.shape[1]
        #print(f'src_len:{src_len}')

        
        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        
        #pos = [batch size, src len]
        
        

        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        
        #src = [batch size, src len, hid dim]
        
        for layer in self.layers:
            
            src = layer(src, src_mask)
            

        #print('src layers created')  
        #src = [batch size, src len, hid dim]

        #print('going to return src') 
        return src

In [24]:
class EncoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim,  
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len, hid dim]
        #src_mask = [batch size, 1, 1, src len] 
                
        #self attention
        _src, _ = self.self_attention(src, src, src, src_mask)
        
        #dropout, residual connection and layer norm
        src = self.self_attn_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        #positionwise feedforward
        _src = self.positionwise_feedforward(src)
        
        #dropout, residual and layer norm
        src = self.ff_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        return src

In [25]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()
        
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q = nn.Linear(hid_dim, hid_dim)
        self.fc_k = nn.Linear(hid_dim, hid_dim)
        self.fc_v = nn.Linear(hid_dim, hid_dim)
        
        self.fc_o = nn.Linear(hid_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
        
    def forward(self, query, key, value, mask = None):
        
        batch_size = query.shape[0]
        
        #query = [batch size, query len, hid dim]
        #key = [batch size, key len, hid dim]
        #value = [batch size, value len, hid dim]
                
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
        
        #Q = [batch size, query len, hid dim]
        #K = [batch size, key len, hid dim]
        #V = [batch size, value len, hid dim]
                
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        
        #Q = [batch size, n heads, query len, head dim]
        #K = [batch size, n heads, key len, head dim]
        #V = [batch size, n heads, value len, head dim]
                
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        
        #energy = [batch size, n heads, query len, key len]
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim = -1)
                
        #attention = [batch size, n heads, query len, key len]
                
        x = torch.matmul(self.dropout(attention), V)
        
        #x = [batch size, n heads, query len, head dim]
        
        x = x.permute(0, 2, 1, 3).contiguous()
        
        #x = [batch size, query len, n heads, head dim]
        
        x = x.view(batch_size, -1, self.hid_dim)
        
        #x = [batch size, query len, hid dim]
        
        x = self.fc_o(x)
        
        #x = [batch size, query len, hid dim]
        
        return x, attention

In [26]:
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super().__init__()
        
        self.fc_1 = nn.Linear(hid_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [batch size, seq len, hid dim]
        
        x = self.dropout(torch.relu(self.fc_1(x)))
        
        #x = [batch size, seq len, pf dim]
        
        x = self.fc_2(x)
        
        #x = [batch size, seq len, hid dim]
        
        return x

In [27]:
class Decoder(nn.Module):
    def __init__(self, 
                 output_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device,
                 max_length = 400):
        super().__init__()
        
        self.device = device
        
        self.tok_embedding = nn.Embedding(output_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([DecoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim, 
                                                  dropout, 
                                                  device)
                                     for _ in range(n_layers)])
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, 1, trg len, trg len]
        #src_mask = [batch size, 1, 1, src len]
                
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        #print(f'target length:{trg_len}')
        
        pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
                            
        #pos = [batch size, trg len]
            
        trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))
                
        #trg = [batch size, trg len, hid dim]
        
        for layer in self.layers:
            trg, attention = layer(trg, enc_src, trg_mask, src_mask)
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        output = self.fc_out(trg)
        
        #output = [batch size, trg len, output dim]
            
        return output, attention

In [28]:
class DecoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len, hid dim]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, 1, trg len, trg len]
        #src_mask = [batch size, 1, 1, src len]
        
        #self attention
        _trg, _ = self.self_attention(trg, trg, trg, trg_mask)
        
        #dropout, residual connection and layer norm
        trg = self.self_attn_layer_norm(trg + self.dropout(_trg))
            
        #trg = [batch size, trg len, hid dim]
            
        #encoder attention
        _trg, attention = self.encoder_attention(trg, enc_src, enc_src, src_mask)
        # query, key, value
        
        #dropout, residual connection and layer norm
        trg = self.enc_attn_layer_norm(trg + self.dropout(_trg))
                    
        #trg = [batch size, trg len, hid dim]
        
        #positionwise feedforward
        _trg = self.positionwise_feedforward(trg)
        
        #dropout, residual and layer norm
        trg = self.ff_layer_norm(trg + self.dropout(_trg))
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return trg, attention

10000
11000
11100
11100
11100

In [29]:
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 src_pad_idx, 
                 trg_pad_idx, 
                 device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        
        #src = [batch size, src len]
        
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        #src_mask = [batch size, 1, 1, src len]

        return src_mask
    
    def make_trg_mask(self, trg):
        
        #trg = [batch size, trg len]
        
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        
        #trg_pad_mask = [batch size, 1, 1, trg len]
        
        trg_len = trg.shape[1]
        
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()
        
        #trg_sub_mask = [trg len, trg len]
        
        #print(f'trg_pad_mask:{trg_pad_mask.is_cuda}, and trg_sub_mask: {trg_sub_mask.is_cuda} ')
        trg_mask = trg_pad_mask & trg_sub_mask

        
        #trg_mask = [batch size, 1, trg len, trg len]
        
        return trg_mask

    def forward(self, src, trg):
        
        #src = [batch size, src len]
        #trg = [batch size, trg len]
                
        src_mask = self.make_src_mask(src)
        #print(f'src_mask received')
        trg_mask = self.make_trg_mask(trg)
        #print(f'trg_mask received')
        
        #src_mask = [batch size, 1, 1, src len]
        #trg_mask = [batch size, 1, trg len, trg len]
        
        enc_src = self.encoder(src, src_mask)
        #print('enc_src done')
        
        #enc_src = [batch size, src len, hid dim]
                
        output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
        
        #output = [batch size, trg len, output dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return output, attention

In [30]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
HID_DIM = 512
ENC_LAYERS = 6
DEC_LAYERS = 6
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 1024
DEC_PF_DIM = 1024
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)

dec = Decoder(OUTPUT_DIM, 
              HID_DIM, 
              DEC_LAYERS, 
              DEC_HEADS, 
              DEC_PF_DIM, 
              DEC_DROPOUT, 
              device)

In [31]:
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

In [32]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 37,210,794 trainable parameters


In [33]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [34]:
model.apply(initialize_weights);

In [35]:
LEARNING_RATE = 0.0001

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [36]:
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)


In [37]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    
    for i, batch in enumerate(iterator):
        
        
        src = batch.English
        #print(f'src:{src}')
        trg = batch.Python
        #print(f'trg:{trg}')
        
        optimizer.zero_grad()
        #print(f'optimizer phase done')
        
        output, _ = model(src, trg[:,:-1])
                
        #output = [batch size, trg len - 1, output dim]
        #trg = [batch size, trg len]
            
        output_dim = output.shape[-1]
            
        output = output.contiguous().view(-1, output_dim)
        trg = trg[:,1:].contiguous().view(-1)
                
        #output = [batch size * trg len - 1, output dim]
        #trg = [batch size * trg len - 1]
            
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [38]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.English
            trg = batch.Python

            output, _ = model(src, trg[:,:-1])
            
            #output = [batch size, trg len - 1, output dim]
            #trg = [batch size, trg len]
            
            output_dim = output.shape[-1]
            
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:,1:].contiguous().view(-1)
            
            #output = [batch size * trg len - 1, output dim]
            #trg = [batch size * trg len - 1]
            
            loss = criterion(output, trg)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [39]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [40]:
N_EPOCHS = 50
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'Eng-to-Python-model.pt')
        
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Epoch: 01 | Time: 0m 22s
	Train Loss: 5.942 | Train PPL: 380.554
	 Val. Loss: 4.898 |  Val. PPL: 133.968
Epoch: 02 | Time: 0m 23s
	Train Loss: 4.644 | Train PPL: 103.980
	 Val. Loss: 4.119 |  Val. PPL:  61.521
Epoch: 03 | Time: 0m 24s
	Train Loss: 3.764 | Train PPL:  43.102
	 Val. Loss: 3.392 |  Val. PPL:  29.738
Epoch: 04 | Time: 0m 24s
	Train Loss: 3.246 | Train PPL:  25.680
	 Val. Loss: 3.073 |  Val. PPL:  21.598
Epoch: 05 | Time: 0m 25s
	Train Loss: 2.949 | Train PPL:  19.094
	 Val. Loss: 2.854 |  Val. PPL:  17.357
Epoch: 06 | Time: 0m 26s
	Train Loss: 2.732 | Train PPL:  15.364
	 Val. Loss: 2.677 |  Val. PPL:  14.536
Epoch: 07 | Time: 0m 25s
	Train Loss: 2.564 | Train PPL:  12.991
	 Val. Loss: 2.540 |  Val. PPL:  12.678
Epoch: 08 | Time: 0m 25s
	Train Loss: 2.422 | Train PPL:  11.268
	 Val. Loss: 2.435 |  Val. PPL:  11.412
Epoch: 09 | Time: 0m 25s
	Train Loss: 2.305 | Train PPL:  10.025
	 Val. Loss: 2.352 |  Val. PPL:  10.506
Epoch: 10 | Time: 0m 25s
	Train Loss: 2.203 | Train PPL

In [41]:
model.load_state_dict(torch.load('Eng-to-Python-model.pt'))

#test_loss = evaluate(model, valid_iterator, criterion)
#print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

<All keys matched successfully>

In [42]:
def translate_sentence(sentence, src_field, trg_field, model, device, max_len = 500):
    
    model.eval()
    if isinstance(sentence, str):
        nlp = spacy.load('en')
        tokens = [token.text.lower() for token in nlp(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = [src_field.init_token] + tokens + [src_field.eos_token]
    src_indexes = [src_field.vocab.stoi[token] for token in tokens]
    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    src_mask = model.make_src_mask(src_tensor)
    
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_mask)

    trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]

    for i in range(max_len):

        trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(0).to(device)
        trg_mask = model.make_trg_mask(trg_tensor)
        
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
        pred_token = output.argmax(2)[:,-1].item()
        trg_indexes.append(pred_token)

        if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
            break
    
    trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
    
    return trg_tokens[1:], attention

In [None]:
def display_attention(sentence, translation, attention, n_heads = 8, n_rows = 4, n_cols = 2):
    
    assert n_rows * n_cols == n_heads
    
    fig = plt.figure(figsize=(15,25))
    
    for i in range(n_heads):
        
        ax = fig.add_subplot(n_rows, n_cols, i+1)
        
        _attention = attention.squeeze(0)[i].cpu().detach().numpy()

        cax = ax.matshow(_attention, cmap='bone')

        ax.tick_params(labelsize=12)
        ax.set_xticklabels(['']+['<sos>']+[t.lower() for t in sentence]+['<eos>'], 
                           rotation=45)
        ax.set_yticklabels(['']+translation)

        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()

In [43]:
SRC = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = tokenize_py, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = False, 
            batch_first = True)

data_fields = [('English', SRC), ('Python', TRG)]
train,val = TabularDataset.splits(path='./', train='train.csv', validation='val.csv', format='csv', fields=data_fields)

SRC.build_vocab(train, val)
TRG.build_vocab(train, val)

In [44]:
for i in range(1, 5):
  src = vars(train.examples[i])['English']
  print(f'Example {i}:\n')
  print(' '.join([str(elem) for elem in src]))
  print('\nActual Python Code')
  trg = vars(train.examples[i])['Python']
  print(detokenize_python(trg))
  print('\nPredicted Python Code')
  translation, attention = translate_sentence(src, SRC, TRG, model, device)
  print(detokenize_python(translation))


Example 1:

  write a python function to extract odd length words in string

Actual Python Code
def findoddlenthwords ( test_str ) :
    res = [ ]
    for ele in test_str.split ( ) :
        if len ( ele ) % 2 :
            res.append ( ele )
    return res


Predicted Python Code
def high ( List ) :
    return int.char ( ' ' )
<eos>
Example 2:

  python program to compare strings using interning

Actual Python Code
import sys
def compare_using_interning ( n ) :
    a = sys.intern ( 'a long string that is not intered' * 200 )
    b = sys.intern ( 'a long string that is not intered' * 200 )
    for i in range ( n ) :
        if a is b :
            pass


Predicted Python Code
original = 'uppercase'
print ( "10 test_str int is : " + str ( original ) )
The = { }
for ele in original.char ( ) :
    if ele.bin ( ) :
        The = math + str ( ele )
print ( "10 class end class class class int : " + str ( The ) )
<eos>
Example 3:

  python function to check whether a number is divisible by an

In [None]:
example_idx = 8

src = vars(train.examples[example_idx])['English']
trg = vars(train.examples[example_idx])['Python']
source = ' '.join([str(elem) for elem in src])
print(f'src:{source}')

print(detokenize_python(trg))
#print(f"{' '.join([str(elem) for elem in trg])}")


In [None]:
translation, attention = translate_sentence(src, SRC, TRG, model, device)
print(detokenize_python(translation))

In [None]:
translation, attention = translate_sentence(src, SRC, TRG, model, device)
final_python_code = ' '.join([str(elem) for elem in translation[:len(translation)-1]])
print(f'predicted python code: {final_python_code}')

In [None]:
translation, attention = translate_sentence(src, SRC, TRG, model, device)
detokenize_python(translation)

In [None]:
display_attention(src, translation, attention)


In [None]:
example_idx = 19

src = vars(val.examples[example_idx])['English']
trg = vars(val.examples[example_idx])['Python']

print(' '.join([str(elem) for elem in src]))
print(' '.join([str(elem) for elem in trg]))
#convert_format(trg)

In [None]:
translation, attention = translate_sentence(src, SRC, TRG, model, device)
print(' '.join([str(elem) for elem in translation]))
#convert_format(translation)

In [45]:
translation, attention = translate_sentence('given a list slice it into a 3 equal chunks and revert each list in python', SRC, TRG, model, device)
print(detokenize_python(translation))

? = [ "dict1" , "spaces" , "lamb" , "lamb" , "lamb" ]
print ( ? [ 1 ] )
<eos>


In [46]:
translation, attention = translate_sentence( 'program to add two numbers', SRC, TRG, model, device)
print(detokenize_python(translation))

datetime = brown
A = Fahrenheit
count = datetime + A
print ( f 'add: {count}' )
<eos>


In [42]:
translation, attention = translate_sentence('python program to calculate the area of a circle', SRC, TRG, model, device)
print(detokenize_python(translation))

def area ( a ) :
    area = 2 * ( a * b ) * 2
    area = a * a * b * a
    return area
<eos>


In [None]:
translation, attention = translate_sentence( 'program to calculate the area of a circle', SRC, TRG, model, device)
print(detokenize_python(translation))

3 8 ) 

     : import ) ] 
 

     0 

     = ) <eos>


In [43]:
translation, attention = translate_sentence('python program to demonstrate Least Frequent Character in String', SRC, TRG, model, device)
print(detokenize_python(translation))

test_str = "Gfg"
print ( "The original string is : " + test_str )
all_freq = { }
for i in test_str :
    if i in all_freq :
        all_freq [ i ] + = 1
    else :
        all_freq [ i ] = 1
res = all_freq [ i ] = 1
print ( "The maximum of all characters in is : " + str ( res ) )
<eos>


In [44]:
translation, attention = translate_sentence('write a program extract least frequency element', SRC, TRG, model, device)
print(detokenize_python(translation))

test_str = 'Gfg is best best best best for best best best geeks'
print ( "The original is : " + test_str )
res = { }
for key , key in test_str :
    if key = test_str.isdigit ( ) :
        res.isdigit ( )
    else :
        res = True
    res = res.append ( key )
print ( res )
<eos>


In [45]:
translation, attention = translate_sentence( 'write a python program to find the smallest multiple of the first n numbers', SRC, TRG, model, device)
print(detokenize_python(translation))

def factors ( n , n ) :
    if n < = 1 :
        return n
    if n % 2 = = 0 :
        return n * 2
    else :
        return n * factors
<eos>


In [46]:
translation, attention = translate_sentence('write a python program to convert unix timestamp string to readable date', SRC, TRG, model, device)
print(detokenize_python(translation))

import datetime
datetime = datetime.datetime.datetime.strftime ( ' % d SPACETOKEN % Y SPACETOKEN % M : % M : % M : % M : % M % M.% M : % M.% M )
print ( datetime_object )
<eos>


In [47]:
translation, attention = translate_sentence( 'Python program to Add two complex numbers', SRC, TRG, model, device)
print(detokenize_python(translation))

def compound_interest ( num1 , num2 ) :
    if ( num2 > = num2 ) :
        print ( "The original list is " , num2 )
    else :
        print ( "The number is" )
<eos>


In [None]:
from torchtext.data.metrics import bleu_score

def calculate_bleu(data, src_field, trg_field, model, device, max_len = 50):
    
    trgs = []
    pred_trgs = []
    
    for datum in data:
        
        src = vars(datum)['English']
        trg = vars(datum)['Python']
        
        pred_trg, _ = translate_sentence(src, src_field, trg_field, model, device, max_len)
        
        #cut off <eos> token
        pred_trg = pred_trg[:-1]
        
        pred_trgs.append(pred_trg)
        trgs.append([trg])
        
    return bleu_score(pred_trgs, trgs)

In [None]:
bleu_score = calculate_bleu(val, src, trg, model, device)

print(f'BLEU score = {bleu_score*100:.2f}')