In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchtext
from torchtext.legacy.data import Field, BucketIterator, Example, Dataset

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import spacy
import numpy as np
import pandas as pd
import re

import random
import math
import time
from sklearn.model_selection import train_test_split
import tokenize
from io import BytesIO

In [4]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [5]:
!python -m spacy download en

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.7/dist-packages/en_core_web_sm -->
/usr/local/lib/python3.7/dist-packages/spacy/data/en
You can now load the model via spacy.load('en')


In [6]:
lines = []
with open('english_python_data.txt', encoding='utf-8') as f:
    for counter, line in enumerate(f):
            lines.append(line)
#Removing comments with initialize in it 
comment_re = re.compile(r'#\s*\dx\d\s*matrix|#\s*result|#\s*iterate|#\s*initialize|#\s*Driver|#\s*This function|#\s*Iterate', 
                        re.IGNORECASE)
lines = [x for x in lines if re.search(comment_re, x) is None]

In [7]:
example_start_id = [counter for counter,_ in enumerate(lines) if (_.startswith("#") or _.startswith(" #")) and (lines[counter-1].strip() == '')]
training_examples = []
for num, idx in enumerate(example_start_id):
    if idx != example_start_id[-1]:
        example_dict = {}
        example = lines[example_start_id[num]:example_start_id[num+1]]
        if (re.search(r"#\s*\d", example[0], re.IGNORECASE)) and (re.search(r"#", example[1], re.IGNORECASE)) is not None:
                    example_dict['ques_prompt'] = example[1].strip()
                    example_dict['source_code'] = "".join(example[2:]).strip()
        elif re.search(r'#\s*In\[\d*\]', "".join(example), re.IGNORECASE) is not None:
            continue
        else:
            example_dict['ques_prompt'] = example[0].strip()
            example_dict['source_code'] = "".join(example[1:]).strip()
        training_examples.append(example_dict)
    else:
        example_dict = {}
        example = lines[example_start_id[num]:]
        example_dict['ques_prompt'] = example[0].strip()
        example_dict['source_code'] = "".join(example[1:]).strip()
        training_examples.append(example_dict)

In [8]:
full_data = pd.DataFrame(training_examples)

In [9]:
full_data.head()

Unnamed: 0,ques_prompt,source_code
0,# write a python program to add two numbers,num1 = 1.5\nnum2 = 6.3\nsum = num1 + num2\npri...
1,# write a python function to add two user prov...,"def add_two_numbers(num1, num2):\n sum = nu..."
2,# write a program to find and print the larges...,num1 = 10\nnum2 = 12\nnum3 = 14\nif (num1 >= n...
3,# write a program to find and print the smalle...,num1 = 10\nnum2 = 12\nnum3 = 14\nif (num1 <= n...
4,# Write a python function to merge two given l...,"def merge_lists(l1, l2):\n return l1 + l2"


In [10]:
full_data.shape

(4343, 2)

In [11]:
full_data.isnull().sum()

ques_prompt    0
source_code    0
dtype: int64

In [12]:
full_data['source_code'].apply(lambda x:len(x)).quantile(0.81)

247.02000000000044

In [13]:
len_filter = full_data['source_code'].apply(lambda x:len(x) > 250)
len_filter[len_filter == True].index

Int64Index([   5,    7,   53,   62,   74,  101,  177,  181,  193,  197,
            ...
            4282, 4295, 4304, 4317, 4318, 4319, 4320, 4321, 4322, 4323],
           dtype='int64', length=808)

In [14]:
full_data.drop(len_filter[len_filter == True].index.tolist(), inplace = True, axis = 0)
full_data.reset_index(drop = True, inplace = True)
full_data['ques_prompt'] = full_data['ques_prompt'].apply(lambda x:re.sub(r'\d*', '', x))
full_data['ques_prompt'] = full_data['ques_prompt'].apply(lambda x:re.sub(r'(#\s)+', '', x))
full_data.columns = ['src', 'trg']

In [15]:
full_data.head()

Unnamed: 0,src,trg
0,write a python program to add two numbers,num1 = 1.5\nnum2 = 6.3\nsum = num1 + num2\npri...
1,write a python function to add two user provid...,"def add_two_numbers(num1, num2):\n sum = nu..."
2,write a program to find and print the largest ...,num1 = 10\nnum2 = 12\nnum3 = 14\nif (num1 >= n...
3,write a program to find and print the smallest...,num1 = 10\nnum2 = 12\nnum3 = 14\nif (num1 <= n...
4,Write a python function to merge two given lis...,"def merge_lists(l1, l2):\n return l1 + l2"


In [16]:
full_data.shape

(3535, 2)

In [17]:
en_tokenizer = spacy.load('en')
py_tokenizer = spacy.load('en')

In [18]:
def tokenize_py(text):
    """
    Tokenizes Python text from a string into a list of strings
    """
    return [tok.text for tok in py_tokenizer.tokenizer(text)]

def tokenize_en(text):
    """
    Tokenizes English text from a string into a list of strings
    """
    return [tok.text for tok in en_tokenizer.tokenizer(text)]

In [19]:
SRC = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = tokenize_py, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = False, 
            batch_first = True)

In [27]:
print(f'Target Sentence :\n{full_data.iloc[1, 1]}')
print(f'Tokens : {tokenize_py(full_data.iloc[1, 1])}')

Target Sentence :
def add_two_numbers(num1, num2):
    sum = num1 + num2
    return sum
Tokens : ['def', 'add_two_numbers(num1', ',', 'num2', ')', ':', '\n    ', 'sum', '=', 'num1', '+', 'num2', '\n    ', 'return', 'sum']


In [28]:
fields = [('src', SRC), ('trg', TRG)]

In [29]:
examples = [Example.fromlist([full_data.src[i], full_data.trg[i]], fields) for i in range(full_data.shape[0])]

In [30]:
complete_dataset = Dataset(examples, fields)

In [31]:
train_data, valid_data, test_data = complete_dataset.split(split_ratio=[0.80, 0.05, 0.15], 
                                    random_state=random.seed(SEED))

In [32]:
len(train_data), len(valid_data), len(test_data)

(2828, 530, 177)

In [33]:
vars(train_data.examples[0])

{'src': ['replacing', 'a', 'string', 'with', 'another', 'string'],
 'trg': ['word',
  '=',
  '"',
  'Hello',
  'World',
  '"',
  '\n',
  'replace',
  '=',
  '"',
  'Bye',
  '"',
  '\n',
  'input',
  '=',
  '"',
  'Hello',
  '"',
  '\n',
  'after_replace',
  '=',
  'word.replace(input',
  ',',
  'replace',
  ')',
  '\n',
  'print(f"String',
  'ater',
  'replacement',
  ':',
  '{',
  'after_replace',
  '}',
  '"',
  ')']}

In [34]:
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 1)

In [35]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
device

device(type='cuda')

In [36]:
BATCH_SIZE = 64

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), sort_key = lambda x:len(x.src), sort_within_batch = False,
     batch_size = BATCH_SIZE,
     device = device)

In [37]:
class Encoder(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim,
                 dropout, 
                 device,
                 max_length = 200):
        super().__init__()

        self.device = device
        
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([EncoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim,
                                                  dropout, 
                                                  device) 
                                     for _ in range(n_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len]
        #src_mask = [batch size, 1, 1, src len]
        
        batch_size = src.shape[0]
        src_len = src.shape[1]
        
        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        
        #pos = [batch size, src len]
        
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        
        #src = [batch size, src len, hid dim]
        
        for layer in self.layers:
            src = layer(src, src_mask)
            
        #src = [batch size, src len, hid dim]
            
        return src

In [38]:
class EncoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim,  
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len, hid dim]
        #src_mask = [batch size, 1, 1, src len] 
                
        #self attention
        _src, _ = self.self_attention(src, src, src, src_mask)
        
        #dropout, residual connection and layer norm
        src = self.self_attn_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        #positionwise feedforward
        _src = self.positionwise_feedforward(src)
        
        #dropout, residual and layer norm
        src = self.ff_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        return src

In [39]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()
        
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q = nn.Linear(hid_dim, hid_dim)
        self.fc_k = nn.Linear(hid_dim, hid_dim)
        self.fc_v = nn.Linear(hid_dim, hid_dim)
        
        self.fc_o = nn.Linear(hid_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
        
    def forward(self, query, key, value, mask = None):
        
        batch_size = query.shape[0]
        
        #query = [batch size, query len, hid dim]
        #key = [batch size, key len, hid dim]
        #value = [batch size, value len, hid dim]
                
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
        
        #Q = [batch size, query len, hid dim]
        #K = [batch size, key len, hid dim]
        #V = [batch size, value len, hid dim]
                
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        
        #Q = [batch size, n heads, query len, head dim]
        #K = [batch size, n heads, key len, head dim]
        #V = [batch size, n heads, value len, head dim]
                
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        
        #energy = [batch size, n heads, query len, key len]
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim = -1)
                
        #attention = [batch size, n heads, query len, key len]
                
        x = torch.matmul(self.dropout(attention), V)
        
        #x = [batch size, n heads, query len, head dim]
        
        x = x.permute(0, 2, 1, 3).contiguous()
        
        #x = [batch size, query len, n heads, head dim]
        
        x = x.view(batch_size, -1, self.hid_dim)
        
        #x = [batch size, query len, hid dim]
        
        x = self.fc_o(x)
        
        #x = [batch size, query len, hid dim]
        
        return x, attention

In [40]:
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super().__init__()
        
        self.fc_1 = nn.Linear(hid_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [batch size, seq len, hid dim]
        
        x = self.dropout(torch.relu(self.fc_1(x)))
        
        #x = [batch size, seq len, pf dim]
        
        x = self.fc_2(x)
        
        #x = [batch size, seq len, hid dim]
        
        return x

In [41]:
class Decoder(nn.Module):
    def __init__(self, 
                 output_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device,
                 max_length = 250):
        super().__init__()
        
        self.device = device
        
        self.tok_embedding = nn.Embedding(output_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([DecoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim, 
                                                  dropout, 
                                                  device)
                                     for _ in range(n_layers)])
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, 1, trg len, trg len]
        #src_mask = [batch size, 1, 1, src len]
                
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        
        pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
                            
        #pos = [batch size, trg len]
            
        trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))
                
        #trg = [batch size, trg len, hid dim]
        
        for layer in self.layers:
            trg, attention = layer(trg, enc_src, trg_mask, src_mask)
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        output = self.fc_out(trg)
        
        #output = [batch size, trg len, output dim]
            
        return output, attention

In [42]:
class DecoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len, hid dim]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, 1, trg len, trg len]
        #src_mask = [batch size, 1, 1, src len]
        
        #self attention
        _trg, _ = self.self_attention(trg, trg, trg, trg_mask)
        
        #dropout, residual connection and layer norm
        trg = self.self_attn_layer_norm(trg + self.dropout(_trg))
            
        #trg = [batch size, trg len, hid dim]
            
        #encoder attention
        _trg, attention = self.encoder_attention(trg, enc_src, enc_src, src_mask)
        # query, key, value
        
        #dropout, residual connection and layer norm
        trg = self.enc_attn_layer_norm(trg + self.dropout(_trg))
                    
        #trg = [batch size, trg len, hid dim]
        
        #positionwise feedforward
        _trg = self.positionwise_feedforward(trg)
        
        #dropout, residual and layer norm
        trg = self.ff_layer_norm(trg + self.dropout(_trg))
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return trg, attention

In [43]:
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 src_pad_idx, 
                 trg_pad_idx, 
                 device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        
        #src = [batch size, src len]
        
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        #src_mask = [batch size, 1, 1, src len]

        return src_mask
    
    def make_trg_mask(self, trg):
        
        #trg = [batch size, trg len]
        
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        
        #trg_pad_mask = [batch size, 1, 1, trg len]
        
        trg_len = trg.shape[1]
        
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()
        
        #trg_sub_mask = [trg len, trg len]
            
        trg_mask = trg_pad_mask & trg_sub_mask
        
        #trg_mask = [batch size, 1, trg len, trg len]
        
        return trg_mask

    def forward(self, src, trg):
        
        #src = [batch size, src len]
        #trg = [batch size, trg len]
                
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        
        #src_mask = [batch size, 1, 1, src len]
        #trg_mask = [batch size, 1, trg len, trg len]
        
        enc_src = self.encoder(src, src_mask)
        
        #enc_src = [batch size, src len, hid dim]
                
        output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
        
        #output = [batch size, trg len, output dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return output, attention

In [57]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)

dec = Decoder(OUTPUT_DIM, 
              HID_DIM, 
              DEC_LAYERS, 
              DEC_HEADS, 
              DEC_PF_DIM, 
              DEC_DROPOUT, 
              device)

In [58]:
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

In [59]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 7,738,316 trainable parameters


In [60]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [61]:
model.apply(initialize_weights);

In [62]:
LEARNING_RATE = 0.0001

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [63]:
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [64]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        src = batch.src
        trg = batch.trg
        
        optimizer.zero_grad()
        
        output, _ = model(src, trg[:,:-1])
                
        #output = [batch size, trg len - 1, output dim]
        #trg = [batch size, trg len]
            
        output_dim = output.shape[-1]
            
        output = output.contiguous().view(-1, output_dim)
        trg = trg[:,1:].contiguous().view(-1)
                
        #output = [batch size * trg len - 1, output dim]
        #trg = [batch size * trg len - 1]
            
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [65]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg

            output, _ = model(src, trg[:,:-1])
            
            #output = [batch size, trg len - 1, output dim]
            #trg = [batch size, trg len]
            
            output_dim = output.shape[-1]
            
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:,1:].contiguous().view(-1)
            
            #output = [batch size * trg len - 1, output dim]
            #trg = [batch size * trg len - 1]
            
            loss = criterion(output, trg)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [66]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [67]:
N_EPOCHS = 100
CLIP = 1

best_valid_loss = float('inf')
train_losses = []
valid_losses = []
for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'Capstone_Exp2.pt')
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Epoch: 01 | Time: 0m 4s
	Train Loss: 7.731 | Train PPL: 2277.700
	 Val. Loss: 6.955 |  Val. PPL: 1048.098
Epoch: 02 | Time: 0m 4s
	Train Loss: 6.420 | Train PPL: 613.977
	 Val. Loss: 5.816 |  Val. PPL: 335.659
Epoch: 03 | Time: 0m 4s
	Train Loss: 5.456 | Train PPL: 234.083
	 Val. Loss: 5.077 |  Val. PPL: 160.247
Epoch: 04 | Time: 0m 4s
	Train Loss: 4.812 | Train PPL: 122.963
	 Val. Loss: 4.546 |  Val. PPL:  94.269
Epoch: 05 | Time: 0m 4s
	Train Loss: 4.345 | Train PPL:  77.109
	 Val. Loss: 4.212 |  Val. PPL:  67.479
Epoch: 06 | Time: 0m 4s
	Train Loss: 4.008 | Train PPL:  55.013
	 Val. Loss: 3.979 |  Val. PPL:  53.469
Epoch: 07 | Time: 0m 4s
	Train Loss: 3.756 | Train PPL:  42.757
	 Val. Loss: 3.798 |  Val. PPL:  44.621
Epoch: 08 | Time: 0m 4s
	Train Loss: 3.560 | Train PPL:  35.162
	 Val. Loss: 3.663 |  Val. PPL:  38.963
Epoch: 09 | Time: 0m 4s
	Train Loss: 3.390 | Train PPL:  29.653
	 Val. Loss: 3.545 |  Val. PPL:  34.641
Epoch: 10 | Time: 0m 4s
	Train Loss: 3.245 | Train PPL:  25.66

In [68]:
model.load_state_dict(torch.load('Capstone_Exp2.pt'))

test_loss = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

| Test Loss: 2.416 | Test PPL:  11.203 |


In [69]:
def translate_sentence(sentence, src_field, trg_field, model, device, max_len = 50):
    
    model.eval()
        
    if isinstance(sentence, str):
        nlp = spacy.load('en')
        tokens = [token.text.lower() for token in nlp(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = [src_field.init_token] + tokens + [src_field.eos_token]
        
    src_indexes = [src_field.vocab.stoi[token] for token in tokens]

    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    
    src_mask = model.make_src_mask(src_tensor)
    
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_mask)

    trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]

    for i in range(max_len):

        trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(0).to(device)

        trg_mask = model.make_trg_mask(trg_tensor)
        
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
        
        pred_token = output.argmax(2)[:,-1].item()
        
        trg_indexes.append(pred_token)

        if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
            break
    
    trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
    
    return trg_tokens[1:], attention

In [70]:
def display_attention(sentence, translation, attention, n_heads = 8, n_rows = 8, n_cols = 1):
    
    assert n_rows * n_cols == n_heads
    
    fig = plt.figure(figsize=(200,100))
    
    for i in range(n_heads):
        
        ax = fig.add_subplot(n_rows, n_cols, i+1)
        
        _attention = attention.squeeze(0)[i].cpu().detach().numpy()

        cax = ax.matshow(_attention, cmap='bone')

        ax.tick_params(labelsize=12)
        ax.set_xticklabels(['']+['<sos>']+[t.lower() for t in sentence]+['<eos>'], 
                           rotation=45)
        ax.set_yticklabels(['']+translation)

        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()

In [71]:
def show_results(infer_data, example_idx):
  src = vars(infer_data.examples[example_idx])['src']
  trg = vars(infer_data.examples[example_idx])['trg']
  # print(f'src = {" ".join(src)}')
  # print(f'trg :\n {" ".join(trg)}')

  translation, attention = translate_sentence(src, SRC, TRG, model, device)

  # print(f'predicted trg :\n {" ".join(translation)}')

  # display_attention(src, translation, attention)
  return " ".join(src), " ".join(trg), " ".join(translation)

In [72]:
from tqdm.notebook import tqdm

In [73]:
results_df = pd.DataFrame()
for i in tqdm(range(len(test_data)), total = len(test_data)):
  # print(i)
  src, target, translation = show_results(test_data, i)
  row_df = pd.DataFrame({'src': src, 'target': target, 'translation': translation}, index = [0])
  results_df = pd.concat([results_df, row_df])

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))




In [74]:
results_df.reset_index(drop = True, inplace = True)

In [75]:
results_df

Unnamed: 0,src,target,translation
0,write a python function to get the surface_are...,"def rec_prism_surface_area(length , width , he...","def pyramid_surface_area(base_area , height ) ..."
1,write a python function to add elements of two...,"def add_two_lists(list1 , list2 ) : \n list...","def add_two_lists(list1 , list2 ) : \n list..."
2,write a lambda function to multiply two numbers,"multiply = lambda a , b : a*b",def multiply(numbers ) : \n total = 1 \n ...
3,write a python function to generate cube numbe...,def cube_numbers(n ) : \n for i in range(n...,def cube(num ) \n return num * num * num <...
4,define a custom exception class which takes a ...,class MyError(Exception ) : \n def _ _ ini...,class American(object ) : \n @staticmethod...
...,...,...,...
172,write a python function that returns the h...,"def calculate_hcf(x1 , x2 ) : \n if x1 = =...","def lcm(x , y ) : \n if x > y : \n g..."
173,write a program to compute /+/+/+ ... +n / n+ ...,n = int(raw_input ( ) ) \n sum=0.0 \n for i in...,"num = int(input(""Enter a number : "") ) \n sum ..."
174,write a python program which takes input a num...,"N = int(input(""Please enter a number "") ) \n f...","n = int(input(""Enter a number : "") ) \n n = in..."
175,write a python class that will initiate a numb...,"class Number : \n\t def _ _ init__(self , num ...",class CircleArea ( ) : \n def _ _ init__(s...


In [87]:
idx = 51
print(results_df.iloc[idx]['src'])
print("\n")
print(results_df.iloc[idx]['target'])
print("\n")
print(results_df.iloc[idx]['translation'])

write a function to print given interger to binary


def int_to_binary(num : int ) : 
     " " " 
     function to print number to binary 
     " " " 
     if isinstance(num , int ) : 
         print(f'The binary of { num } is { bin(num).replace("0b " , " " ) } ' ) 
     else : 
         raise ValueError('Invalid Input ' )


def convertToBinary(n ) : 
    if n > 1 : 
        convertToBinary(n//2 ) 
    print(n % 2,end = '' ) <eos>


In [None]:
results_df.to_csv('Test.csv', index = False)