<a href="https://colab.research.google.com/github/kumarchandan/END/blob/main/week14/English-to-Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Perparation

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchtext
from torchtext import data
from torchtext.legacy.data import Field, BucketIterator, TabularDataset

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import spacy
import gensim
import numpy as np

import random
import math
import time
import os
import tokenize
import re
import pandas as pd
import csv
from tqdm import tqdm_notebook

In [2]:
torchtext.__version__

'0.9.0'

In [3]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Reading the text file

In [5]:
corpus_name = 'english-python'
corpus = os.path.join('/content/drive/MyDrive/1-Projects/END/week14/', corpus_name)

def printLines(file,):
  count = 0
  with open(file, 'rb') as f:
    for line in f:
      print(line)
      count += 1
      if count == 10: break

printLines(os.path.join(corpus, 'data_refined_v1.txt'))

b'# write a python program to add two numbers\n'
b'num1 = 1.5\n'
b'num2 = 6.3\n'
b'sum = num1 + num2\n'
b"print(f'Sum: {sum}')\n"
b'\n'
b'\n'
b'# write a python function to add two user provided numbers and return the sum\n'
b'def add_two_numbers(num1, num2):\n'
b'    sum = num1 + num2\n'


In [6]:
corpus_name = 'english-python'
corpus = os.path.join('/content/drive/MyDrive/1-Projects/END/week14/', corpus_name)

def extractQnAPair(file):
  qna_pair_list = []
  qna_pair = { 'src': '', 'trg': ''}
  count = 0
  regex = r"#"
  str = ''
  with open(file, 'r') as f:
    for line in f:
      line = line.lower()
      line = re.sub(r'"#[0123456789]','#', line)
      line = line.replace("\n\n","\n")
      line = line.replace("\n\n\n","\n\n")
      ques_found = re.search(regex, line)
      if ques_found:
        if (qna_pair['src'] and qna_pair['trg']): qna_pair_list.append(qna_pair)
        qna_pair = { 'src': '', 'trg': ''}
        qna_pair['id'] = count
        qna_pair['src'] = line
        qna_pair['trg'] = ''
        count += 1
      else:
        qna_pair['trg'] += line
  return qna_pair_list

qna_pair_list = extractQnAPair(os.path.join(corpus, 'data_refined_v1.txt'))

In [7]:
len(qna_pair_list), qna_pair_list[0]

(4445,
 {'id': 0,
  'src': '# write a python program to add two numbers\n',
  'trg': "num1 = 1.5\nnum2 = 6.3\nsum = num1 + num2\nprint(f'Sum: {sum}')\n\n\n"})

Create a dataframe

In [8]:
df = pd.DataFrame(qna_pair_list)
df.head()

Unnamed: 0,src,trg,id
0,# write a python program to add two numbers\n,num1 = 1.5\nnum2 = 6.3\nsum = num1 + num2\npri...,0
1,# write a python function to add two user prov...,"def add_two_numbers(num1, num2):\n sum = nu...",1
2,# write a program to find and print the larges...,num1 = 10\nnum2 = 12\nnum3 = 14\nif (num1 >= n...,2
3,# write a program to find and print the smalle...,num1 = 10\nnum2 = 12\nnum3 = 14\nif (num1 <= n...,3
4,# Write a python function to merge two given l...,"def merge_lists(l1, l2):\n return l1 + l2\n...",4


In [9]:
# print(qna_pair_list[0].keys()) # dict_keys(['question_id', 'question', 'answer'])
keys = qna_pair_list[0].keys()

shouldExecute = True
if shouldExecute:
  with open('python_qna.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(qna_pair_list)
else:
  print('did not execute')

In [10]:
!python -m spacy download en

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.7/dist-packages/en_core_web_sm -->
/usr/local/lib/python3.7/dist-packages/spacy/data/en
You can now load the model via spacy.load('en')


In [11]:
spacy_en = spacy.load('en')

In [12]:
def tokenize_en(text):
    """
    Tokenizes English text from a string into a list of strings
    """
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [13]:
SRC = Field(tokenize = 'spacy', 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = 'spacy', 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

fields = [("src", SRC), ("trg", TRG)]

In [14]:
dataset = TabularDataset(
    path='python_qna.csv',
    format='csv',
    fields=fields,
    skip_header=True
)
(train_data, valid_data) = dataset.split(split_ratio=[0.85, 0.15])

In [15]:
vars(train_data[0])

{'src': ['#',
  'write',
  'a',
  'python',
  'program',
  'to',
  'print',
  'the',
  'character',
  'of',
  'an',
  'ascii',
  'value'],
 'trg': ['value',
  '=',
  '65',
  '\n',
  "print(f'the",
  'ascii',
  'value',
  '{',
  'value',
  '}',
  'is',
  'of',
  'the',
  'character',
  '{',
  'chr(value',
  ')',
  '}',
  "'",
  ')']}

To decide maxlength for encoder and decoder

In [16]:
max_len_src = 0
max_len_trg = 0

for data in train_data:
  if len(data.src) > max_len_src:
    max_len_src = len(data.src)
  if len(data.trg) > max_len_trg:
    max_len_trg = len(data.trg)

max_len_src, max_len_trg

(84, 488)

In [17]:
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 1)

In [18]:
print('Source vocab size: ', len(SRC.vocab))
print('Target vocab size: ', len(TRG.vocab))

Source vocab size:  1510
Target vocab size:  10034


In [19]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [20]:
BATCH_SIZE = 8

train_iterator, valid_iterator = BucketIterator.splits(
    (train_data, valid_data), 
     batch_size = BATCH_SIZE,
     sort=False,
     device = device)

In [21]:
vars(train_iterator.dataset.examples[0])

{'src': ['#',
  'write',
  'a',
  'python',
  'program',
  'to',
  'print',
  'the',
  'character',
  'of',
  'an',
  'ascii',
  'value'],
 'trg': ['value',
  '=',
  '65',
  '\n',
  "print(f'the",
  'ascii',
  'value',
  '{',
  'value',
  '}',
  'is',
  'of',
  'the',
  'character',
  '{',
  'chr(value',
  ')',
  '}',
  "'",
  ')']}

Tokenize

In [22]:
def tokenize_code(text):
    """
    Replace and Tokenize
    """
    text = str(text).replace('\n', '\t\t')
    return [tok.text for tok in spacy_en.tokenizer(text)]

https://medium.com/@rohit_agrawal/using-fine-tuned-gensim-word2vec-embeddings-with-torchtext-and-pytorch-17eea2883cd

In [23]:
# WORD2VEC
W2V_SIZE = 256
W2V_WINDOW = 3
# W2V_EPOCH = 100
W2V_MIN_COUNT = 2

target = []
for code in df['trg'].values:
  code_token = tokenize_code(code)
  target.append(code_token)

In [24]:
target[:1]

[['num1',
  '=',
  '1.5',
  '\t\t',
  'num2',
  '=',
  '6.3',
  '\t\t',
  'sum',
  '=',
  'num1',
  '+',
  'num2',
  '\t\t',
  "print(f'Sum",
  ':',
  '{',
  'sum',
  '}',
  "'",
  ')',
  '\t\t\t\t\t\t']]

In [25]:
w2v_model = gensim.models.Word2Vec(target, size=W2V_SIZE, window=W2V_WINDOW, min_count=W2V_MIN_COUNT)

In [26]:
word2vec_vectors = []

for token, idx in tqdm_notebook(TRG.vocab.stoi.items()):
  if token in w2v_model.wv.vocab.keys():
    word2vec_vectors.append(torch.FloatTensor(w2v_model[token]))
  else:
    word2vec_vectors.append(torch.zeros(W2V_SIZE))

TRG.vocab.set_vectors(TRG.vocab.stoi, word2vec_vectors, W2V_SIZE)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=10034.0), HTML(value='')))




  """
  """


In [27]:
w2v_model.save('code_embeddings.txt')

![](https://raw.githubusercontent.com/bentrevett/pytorch-seq2seq/9479fcb532214ad26fd4bda9fcf081a05e1aaf4e/assets/transformer-encoder.png)

In [28]:
class Encoder(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim,
                 dropout, 
                 device,
                 max_length = 250):
        super().__init__()

        self.device = device
        
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([EncoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim,
                                                  dropout, 
                                                  device) 
                                     for _ in range(n_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len]
        #src_mask = [batch size, 1, 1, src len]
        
        batch_size = src.shape[0]
        src_len = src.shape[1]

        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        
        #pos = [batch size, src len]
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        
        #src = [batch size, src len, hid dim]
        
        for layer in self.layers:
            src = layer(src, src_mask)
            
        #src = [batch size, src len, hid dim]
            
        return src

In [29]:
class EncoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim,  
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len, hid dim]
        #src_mask = [batch size, 1, 1, src len] 
                
        #self attention
        _src, _ = self.self_attention(src, src, src, src_mask)
        
        #dropout, residual connection and layer norm
        src = self.self_attn_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        #positionwise feedforward
        _src = self.positionwise_feedforward(src)
        
        #dropout, residual and layer norm
        src = self.ff_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        return src

In [30]:
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super().__init__()
        
        self.fc_1 = nn.Linear(hid_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [batch size, seq len, hid dim]
        
        x = self.dropout(torch.relu(self.fc_1(x)))
        
        #x = [batch size, seq len, pf dim]
        
        x = self.fc_2(x)
        
        #x = [batch size, seq len, hid dim]
        
        return x

In [31]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()
        
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q = nn.Linear(hid_dim, hid_dim)
        self.fc_k = nn.Linear(hid_dim, hid_dim)
        self.fc_v = nn.Linear(hid_dim, hid_dim)
        
        self.fc_o = nn.Linear(hid_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
        
    def forward(self, query, key, value, mask = None):
        
        batch_size = query.shape[0]
        
        #query = [batch size, query len, hid dim]
        #key = [batch size, key len, hid dim]
        #value = [batch size, value len, hid dim]
                
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
        
        #Q = [batch size, query len, hid dim]
        #K = [batch size, key len, hid dim]
        #V = [batch size, value len, hid dim]
                
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        
        #Q = [batch size, n heads, query len, head dim]
        #K = [batch size, n heads, key len, head dim]
        #V = [batch size, n heads, value len, head dim]
                
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        
        #energy = [batch size, n heads, query len, key len]
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim = -1)
                
        #attention = [batch size, n heads, query len, key len]
                
        x = torch.matmul(self.dropout(attention), V)
        
        #x = [batch size, n heads, query len, head dim]
        
        x = x.permute(0, 2, 1, 3).contiguous()
        
        #x = [batch size, query len, n heads, head dim]
        
        x = x.view(batch_size, -1, self.hid_dim)
        
        #x = [batch size, query len, hid dim]
        
        x = self.fc_o(x)
        
        #x = [batch size, query len, hid dim]
        
        return x, attention

## Decoder

![](https://raw.githubusercontent.com/bentrevett/pytorch-seq2seq/9479fcb532214ad26fd4bda9fcf081a05e1aaf4e/assets/transformer-decoder.png)

In [32]:
class Decoder(nn.Module):
    def __init__(self, 
                 output_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device,
                 pre_trained_emb,
                 max_length = 500):
        super().__init__()
        
        self.device = device
        
        # self.pre_trained_emb = pre_trained_emb
        self.tok_embedding = nn.Embedding.from_pretrained(pre_trained_emb)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([DecoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim, 
                                                  dropout, 
                                                  device)
                                     for _ in range(n_layers)])
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, 1, trg len, trg len]
        #src_mask = [batch size, 1, 1, src len]
                
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        
        pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
                            
        #pos = [batch size, trg len]

        trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))
                
        #trg = [batch size, trg len, hid dim]
        
        for layer in self.layers:
            trg, attention = layer(trg, enc_src, trg_mask, src_mask)
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        output = self.fc_out(trg)
        
        #output = [batch size, trg len, output dim]
            
        return output, attention

In [33]:
class DecoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len, hid dim]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, 1, trg len, trg len]
        #src_mask = [batch size, 1, 1, src len]
        
        #self attention
        _trg, _ = self.self_attention(trg, trg, trg, trg_mask)
        
        #dropout, residual connection and layer norm
        trg = self.self_attn_layer_norm(trg + self.dropout(_trg))
            
        #trg = [batch size, trg len, hid dim]
            
        #encoder attention
        _trg, attention = self.encoder_attention(trg, enc_src, enc_src, src_mask)
        # query, key, value
        
        #dropout, residual connection and layer norm
        trg = self.enc_attn_layer_norm(trg + self.dropout(_trg))
                    
        #trg = [batch size, trg len, hid dim]
        
        #positionwise feedforward
        _trg = self.positionwise_feedforward(trg)
        
        #dropout, residual and layer norm
        trg = self.ff_layer_norm(trg + self.dropout(_trg))
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return trg, attention

In [34]:
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 src_pad_idx, 
                 trg_pad_idx, 
                 device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        
        #src = [batch size, src len]
        
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        #src_mask = [batch size, 1, 1, src len]

        return src_mask
    
    def make_trg_mask(self, trg):
        
        #trg = [batch size, trg len]
        
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        
        #trg_pad_mask = [batch size, 1, 1, trg len]
        
        trg_len = trg.shape[1]
        
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()
        
        #trg_sub_mask = [trg len, trg len]
            
        trg_mask = trg_pad_mask & trg_sub_mask
        
        #trg_mask = [batch size, 1, trg len, trg len]
        
        return trg_mask

    def forward(self, src, trg):
        
        #src = [batch size, src len]
        #trg = [batch size, trg len]
                
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        
        #src_mask = [batch size, 1, 1, src len]
        #trg_mask = [batch size, 1, trg len, trg len]
        
        enc_src = self.encoder(src, src_mask)
        
        #enc_src = [batch size, src len, hid dim]
                
        output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
        
        #output = [batch size, trg len, output dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return output, attention

# Training

In [35]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

pre_trained_emb = torch.FloatTensor(TRG.vocab.vectors)

enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)

dec = Decoder(OUTPUT_DIM,
              HID_DIM,
              DEC_LAYERS,
              DEC_HEADS,
              DEC_PF_DIM,
              DEC_DROPOUT,
              device,
              pre_trained_emb)

In [36]:
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

In [37]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 7,110,962 trainable parameters


In [38]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [39]:
model.apply(initialize_weights);

In [40]:
LEARNING_RATE = 0.0005

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [41]:
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [42]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        src = batch.src
        trg = batch.trg
        
        optimizer.zero_grad()
        
        output, _ = model(src, trg[:,:-1])
                
        #output = [batch size, trg len - 1, output dim]
        #trg = [batch size, trg len]
            
        output_dim = output.shape[-1]
            
        output = output.contiguous().view(-1, output_dim)
        trg = trg[:,1:].contiguous().view(-1)
                
        #output = [batch size * trg len - 1, output dim]
        #trg = [batch size * trg len - 1]
            
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [43]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg

            output, _ = model(src, trg[:,:-1])
            
            #output = [batch size, trg len - 1, output dim]
            #trg = [batch size, trg len]
            
            output_dim = output.shape[-1]
            
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:,1:].contiguous().view(-1)
            
            #output = [batch size * trg len - 1, output dim]
            #trg = [batch size * trg len - 1]
            
            loss = criterion(output, trg)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [44]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [45]:
N_EPOCHS = 50
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut6-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Epoch: 01 | Time: 0m 17s
	Train Loss: 4.700 | Train PPL: 109.902
	 Val. Loss: 3.833 |  Val. PPL:  46.209
Epoch: 02 | Time: 0m 16s
	Train Loss: 3.564 | Train PPL:  35.295
	 Val. Loss: 3.344 |  Val. PPL:  28.342
Epoch: 03 | Time: 0m 16s
	Train Loss: 3.114 | Train PPL:  22.510
	 Val. Loss: 3.071 |  Val. PPL:  21.563
Epoch: 04 | Time: 0m 16s
	Train Loss: 2.767 | Train PPL:  15.903
	 Val. Loss: 2.854 |  Val. PPL:  17.363
Epoch: 05 | Time: 0m 16s
	Train Loss: 2.474 | Train PPL:  11.869
	 Val. Loss: 2.695 |  Val. PPL:  14.804
Epoch: 06 | Time: 0m 16s
	Train Loss: 2.226 | Train PPL:   9.260
	 Val. Loss: 2.584 |  Val. PPL:  13.252
Epoch: 07 | Time: 0m 16s
	Train Loss: 2.007 | Train PPL:   7.438
	 Val. Loss: 2.452 |  Val. PPL:  11.610
Epoch: 08 | Time: 0m 16s
	Train Loss: 1.822 | Train PPL:   6.187
	 Val. Loss: 2.388 |  Val. PPL:  10.897
Epoch: 09 | Time: 0m 16s
	Train Loss: 1.670 | Train PPL:   5.311
	 Val. Loss: 2.350 |  Val. PPL:  10.485
Epoch: 10 | Time: 0m 16s
	Train Loss: 1.540 | Train PPL

In [47]:
!cp tut6-model.pt '/content/drive/MyDrive/1-Projects/END/week14/capstone'

In [51]:
def generate_code(sentence, src_field, trg_field, model, device, max_len = 50):
    
    model.eval()
        
    if isinstance(sentence, str):
        nlp = spacy.load('en')
        tokens = [token.text.lower() for token in nlp(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = [src_field.init_token] + tokens + [src_field.eos_token]
        
    src_indexes = [src_field.vocab.stoi[token] for token in tokens]

    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    
    src_mask = model.make_src_mask(src_tensor)
    
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_mask)

    trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]

    for i in range(max_len):

        trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(0).to(device)

        trg_mask = model.make_trg_mask(trg_tensor)
        
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
        
        pred_token = output.argmax(2)[:,-1].item()
        
        trg_indexes.append(pred_token)

        if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
            break
    
    trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
    
    return trg_tokens[1:], attention

In [59]:
input_text = 'program to sort a list of dictionaries by key'

print(input_text)

code_snippet, attention = generate_code(input_text, SRC, TRG, model, device)

for i in range(len(code_snippet)):
  print(end ="")
  print(code_snippet[i])

program to sort a list of dictionaries by key


test_list
=
[
"
gfg
"
,
"
  
"
,
"
is
"
,
"
best
"
,
"
for
"
]



print("the
original
list
is
:
"
+
str(test_list
)
)



k
=
2



res
=
list
(
filter(none
,
test_list))[-k
]



print("the
