In [1]:
# Importing necessary Libraries

# from model import *
from train import train
from evaluate import evaluate
import torch

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s : %(levelname)s : %(message)s')
logger = logging.getLogger('PythonCodeGen')
from utils import load_config


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
config = load_config('pycodechatbot.yml')
import torch.nn as nn

from data import GetData

print(f'Currently running on {device}')

Currently running on cuda


In [2]:
# # Config

# %%writefile pycodechatbot.yml
# name: PyCodeGen
# save_dir: saved/
# seed: 1
# target_device: 0

# modelpath:
#     type: PathModel
#     args: savedmodel.h5    
      

# train_iterator:
#     type: Train_Iterator
#     args:
#         batch_size: 10
#         num_workers: 4
#         shuffle: True


# criterion: cross_entropy_loss

# optimizer:
#     type: SGD
#     args:
#         lr: 0.01
#         momentum: 0.95
#         weight_decay: 0.000003


# data:
#     file_name: 'data\english_python_data.txt'

# epochs: 10

In [3]:
BATCH_SIZE = config['train_iterator']['args']['batch_size']
FILEPATH = config['data']['file_name']
LEARNING_RATE = config['optimizer']['args']['lr']

In [5]:
STAT, CODE, train_iterator, valid_iterator = GetData(FILEPATH).load_iterator(batch_size=BATCH_SIZE)

In [6]:
next(iter(train_iterator))


[torchtext.legacy.data.batch.Batch of size 2]
	[.code]:[torch.cuda.LongTensor of size 2x19 (GPU 0)]
	[.statement]:[torch.cuda.LongTensor of size 2x475 (GPU 0)]

In [7]:
from model import Encoder, Decoder, Seq2Seq

INPUT_DIM = len(STAT.vocab)
OUTPUT_DIM = len(CODE.vocab)
HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)


dec = Decoder(OUTPUT_DIM, 
              HID_DIM, 
              DEC_LAYERS, 
              DEC_HEADS, 
              DEC_PF_DIM, 
              DEC_DROPOUT,
              device)

In [8]:
SRC_PAD_IDX = STAT.vocab.stoi[STAT.pad_token]
TRG_PAD_IDX = CODE.vocab.stoi[CODE.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

In [9]:
print(STAT.vocab.stoi[STAT.pad_token])

1


The model has 10,910,721 trainable parameters


In [10]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [11]:
model.apply(initialize_weights)

Seq2Seq(
  (encoder): Encoder(
    (tok_embd): Embedding(6478, 256)
    (pos_embd): Embedding(10000, 256)
    (layers): ModuleList(
      (0): EncoderLayer(
        (self_attn_layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (self_ff_layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (self_attention): MultiHeadAttention(
          (fc_q): Linear(in_features=256, out_features=256, bias=True)
          (fc_k): Linear(in_features=256, out_features=256, bias=True)
          (fc_v): Linear(in_features=256, out_features=256, bias=True)
          (fc_o): Linear(in_features=256, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (positionwise_feedforward): PositionwiseFeedforwardLayer(
          (fc1): Linear(in_features=256, out_features=512, bias=True)
          (fc2): Linear(in_features=512, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropou

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [13]:
import gc
# del variables
gc.collect()

75

In [14]:
print(device)

cuda


In [23]:
from train import train
from evaluate import evaluate
import time
from utils import epoch_time
import math

N_EPOCHS = 10
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut6-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

 13%|█▎        | 285/2148 [00:13<01:24, 22.06it/s]

In [None]:
model.load_state_dict(torch.load('tut6-model.pt'))

test_loss = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')