In [1]:
from Preprocessing import construct_pairs
from Preprocessing import prepare_data
from Preprocessing import load_senses
from Preprocessing import random_batch

from Evaluator import Evaluator

from Model import Encoder_rnn
from Model import Global_attn
from Model import Attn_decoder_rnn
from Model import train

from Utils import time_since
from Utils import load_json

import time
import torch
import numpy as np
import torch.nn as nn
from torch import optim

In [2]:
MAX_LENGTH = 600
USE_CUDA = True

# Loading data

In [3]:
file_path_train = 'data_train_con_poda.json'
pairs_train = load_json(file_path_train)
pairs_train.shape

(8642, 4)

In [4]:
file_path_test = 'data_test_con_poda.json'
pairs_test = load_json(file_path_test)
pairs_test.shape

(4252, 4)

In [5]:
sentence, sense = prepare_data(pairs_train, pairs_test, MAX_LENGTH)

Reading pairs 8642
Filtered to 8642 pairs
Indexing words...
Indexed 17051 words in input language, 17844 words in output


In [6]:
answers_senses_test = load_senses('corpus/test/EnglishLS.test.key')

# Training

In [7]:
# Configure models
hidden_size = 500
n_layers = 2
dropout = 0.1
batch_size = 50

# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 0.5
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_epochs = 5000
epoch = 0
print_every = 10
validate_loss_every = 50
validate_acc_every = 2 * validate_loss_every
epoch_finish_teacher_forcing = 3500
tf_ratio_arr = np.linspace(1.0, 0.0, epoch_finish_teacher_forcing)
tf_limit = 0.5

# Initialize models
encoder = Encoder_rnn(sentence.n_words, hidden_size, n_layers, dropout=dropout, USE_CUDA=USE_CUDA)
decoder = Attn_decoder_rnn('general', hidden_size, sense.n_words, 2 * n_layers, dropout=dropout, USE_CUDA=USE_CUDA)


# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters())
decoder_optimizer = optim.Adam(decoder.parameters())
criterion = nn.NLLLoss()

# Move models to GPU
if USE_CUDA:
    encoder = encoder.cuda()
    decoder = decoder.cuda()
    
# Keep track of time elapsed and running averages
start = time.time()
plot_losses = []
validation_losses = []
validation_acc = []

print_loss_total = 0 # Reset every print_every
plot_loss_total = 0 # Reset every plot_every

In [8]:
while epoch < n_epochs:
    epoch += 1
    
    # Get training data for this cycle
    input_batches, input_lengths, target_batches, target_lengths, _ = random_batch(sentence, sense, batch_size, pairs_train, USE_CUDA)
    
    if epoch < epoch_finish_teacher_forcing:
        tf_ratio = tf_ratio_arr[epoch]
    else:
        tf_ratio = 0
    
    if tf_ratio < tf_limit:
        use_tf = False
    else:
        use_tf = True
    
    # Run the train function
    loss = train(sentence, sense,
        input_batches, input_lengths, target_batches, target_lengths, batch_size,
        encoder, decoder,
        encoder_optimizer, decoder_optimizer, criterion, use_tf, MAX_LENGTH, clip, USE_CUDA=USE_CUDA)
    
    torch.cuda.empty_cache()

    # Keep track of loss
    print_loss_total += loss
    plot_loss_total += loss

    if epoch == 0: continue
    
    if epoch % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print_summary = '%s (%d %d%%) %.4f' % (time_since(start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg)
        plot_losses.append(loss)
        
        print(f'{time_since(start, epoch / n_epochs)} ({epoch} {epoch / n_epochs * 100:.2f}%) train_loss: {print_loss_avg:.4f}', end=' ')

    if epoch % validate_loss_every == 0:
        input_batches, input_lengths, target_batches, target_lengths, id_pairs = random_batch(sentence, sense,
                batch_size, pairs_test, USE_CUDA)
    
        eval_loss = train(sentence, sense, input_batches, input_lengths, target_batches, target_lengths,batch_size,\
                     encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,\
                          0, MAX_LENGTH, train=False, USE_CUDA=USE_CUDA)
        validation_losses.append(eval_loss)
        
        print(f'- val_loss: {eval_loss:.4f}', end=' ')
        
    if epoch % validate_acc_every == 0:
        evaluator = Evaluator(encoder, decoder, sentence, sense, MAX_LENGTH, USE_CUDA)
        eval_acc = evaluator.evaluate_acc(id_pairs, pairs_test, answers_senses_test)
        
        validation_acc.append(eval_acc)
        
        print(f'- val_acc: {eval_acc:.4f}', end='')
        
    if epoch % print_every == 0:
        print()

6m 28s (- 3231m 29s) (10 0.20%) train_loss: 4.6649 
11m 30s (- 2865m 33s) (20 0.40%) train_loss: 2.8472 
17m 44s (- 2940m 20s) (30 0.60%) train_loss: 2.4973 
23m 5s (- 2864m 11s) (40 0.80%) train_loss: 2.5172 
28m 14s (- 2795m 13s) (50 1.00%) train_loss: 2.3393 - val_loss: 3.2474 
32m 38s (- 2687m 40s) (60 1.20%) train_loss: 2.5140 
37m 26s (- 2636m 32s) (70 1.40%) train_loss: 2.4002 
41m 47s (- 2569m 41s) (80 1.60%) train_loss: 2.4846 
46m 17s (- 2525m 29s) (90 1.80%) train_loss: 2.3986 
51m 9s (- 2506m 51s) (100 2.00%) train_loss: 2.3130 - val_loss: 2.6925 

AttributeError: 'Lang' object has no attribute 'index2word'