In [1]:
import random
import time
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [2]:
import import_ipynb
import feature_generation
import model
import helper

importing Jupyter notebook from feature_generation.ipynb
importing Jupyter notebook from clean.ipynb
importing Jupyter notebook from load.ipynb
Size of text: 94403
Size of title: 94403
Size of data variable - X: 94403
Size of target variable - Y: 94403
importing Jupyter notebook from helper.ipynb
importing Jupyter notebook from model.ipynb


In [3]:
MAX_LENGTH = helper.TEXT_MAX_LENGTH
ENCODER_MODEL_WEIGHTS = 'enc_model.pth'
DECODER_MODEL_WEIGHTS = 'dec_model.pth'

In [4]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=model.device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[helper.SOS_token]], device=model.device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == helper.EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length


In [5]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [model.tensorsFromPair(random.choice(pairs),input_features,output_features)
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
#             print('%s (%d %d%%) %.4f' % (helper.timeSince(start, iter / n_iters),
#                                          iter, iter / n_iters * 100, print_loss_avg))
            print("Iteration %d , Average Loss %.4f",iter,print_loss_avg)
    
        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

#     helper.showPlot(plot_losses)

In [6]:
pairs = feature_generation.text_summary_pairs
input_features = feature_generation.input_features
output_features = feature_generation.output_features

In [7]:
hidden_size = 300
encoder = model.EncoderRNN(input_features.n_words, hidden_size).to(model.device)
attn_decoder = model.AttnDecoderRNN(hidden_size, output_features.n_words, dropout_p=0.1).to(model.device)

trainIters(encoder, attn_decoder, 50, print_every=5, plot_every=5)

Iteration %d , Average Loss %.4f 5 10.241721767849393
Iteration %d , Average Loss %.4f 10 8.866782636455461
Iteration %d , Average Loss %.4f 15 8.691952611529638
Iteration %d , Average Loss %.4f 20 6.870512890285914
Iteration %d , Average Loss %.4f 25 5.378276605757455
Iteration %d , Average Loss %.4f 30 5.224963353474935
Iteration %d , Average Loss %.4f 35 6.730844328138564
Iteration %d , Average Loss %.4f 40 2.5969589551289873
Iteration %d , Average Loss %.4f 45 6.051167648488826
Iteration %d , Average Loss %.4f 50 5.3041010663966945


In [8]:
torch.save(encoder.state_dict(), ENCODER_MODEL_WEIGHTS)
torch.save(attn_decoder.state_dict(), DECODER_MODEL_WEIGHTS)