In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
from os import system
from dataloader import *
from VAE import *
from scores import *

import unicodedata
import string
import re
import random
import time
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
plt.switch_backend('agg')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Prepare data

In [None]:
train_vocab = load_data('./data/train.txt')
test_vocab = load_data('./data/test.txt')

## Get different tense pairs for (unconditional) VAE training

In [None]:
def get_tense_paris(train_vocab, source_index, target_index):
    pairs = []

    for vocabs in train_vocab:
        pairs.append((vocabs[source_index],vocabs[target_index]))
        
    return pairs

### Simple Present -> Third Person

In [None]:
train_st_tp  = get_tense_paris(train_vocab, 0, 1)

### Simple Present -> Present Progressive

In [None]:
train_st_pp  = get_tense_paris(train_vocab, 0, 2)

### Simple Present -> Past

In [None]:
train_st_past  = get_tense_paris(train_vocab, 0, 3)

# Train VAE

In [None]:
vocab_size = 28 #The number of vocabulary
SOS_token = 0
EOS_token = vocab_size-1

## Setting hyperparameters

In [None]:
#----------Hyper Parameters----------#
hidden_size = 256
teacher_forcing_ratio = 1.0
empty_input_ratio = 0.1
KLD_weight = 0.0
lr = 0.05

In [None]:
def seqFromPair(pair):
    ord_a = ord('a')
    input_seq = [ord(c) - ord_a + 1 for c in pair[0]]
    target_seq = [ord(c) - ord_a + 1 for c in pair[1]]
    
    return input_seq, target_seq

In [None]:
def train(vae_model, input_seq, target_seq, use_teacher_forcing, optimizer, criterion):
    optimizer.zero_grad()
    
    # Check device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Initialize hidden feature
    hidden = torch.zeros(1, 1, hidden_size, device=device)
        
    # Run model
    if use_teacher_forcing:
        result, mu, logvar = vae_model(input_seq, hidden, use_teacher_forcing, target_seq)
    else:
        result, mu, logvar = vae_model(input_seq, hidden, use_teacher_forcing, None)
            
            
    # Ground truth should have EOS in the end
    target_seq.append(EOS_token)
        
    # Calculate loss
    # First, we should strim the sequences by the length of smaller one
    min_len = min(len(target_seq),len(result))
        
    # hat_y need not to do one-hot encoding
    hat_y = result[:min_len]
    y = torch.tensor(target_seq[:min_len], device=device)
        
    loss = criterion(hat_y, y, mu, logvar)
        
    loss.backward()
    optimizer.step()
    
    return loss.item()

In [None]:
def trainIter(vae_model, data_pairs, n_iters, print_every=1000, learning_rate=0.01, teacher_forcing_ratio = 1.0,\
         optimizer = None, criterion = VAE_Loss):
    loss_list = []
    
    # Randomly generate training pairs from data
    training_pairs = [seqFromPair(random.choice(data_pairs))
                      for i in range(n_iters)]    
    
    # Check optimizer; default: SGD
    if optimizer is None:
        optimizer = optim.SGD(vae_model.parameters(), lr=learning_rate)
    
    vae_model.train()
    for i in range(n_iters):        
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        # Seperate pair for input
        input_seq, target_seq = training_pairs[i] 
        
        loss = train(vae_model, input_seq, target_seq, use_teacher_forcing, optimizer, criterion)
    
        loss_list.append(loss)
        if (i+1) % print_every == 0:
            print('Iter %d: loss = %.4f' % (i+1, loss))
    
    return loss_list

In [None]:
my_vae = VAE(vocab_size, hidden_size, vocab_size, teacher_forcing_ratio).to(device)

In [None]:
optimizer = optim.SGD(my_vae.parameters(), lr=lr)

## Train with Simple Present -> Present Progressive

In [None]:
loss_list = trainIter(my_vae, train_st_pp, n_iters=100000, print_every=500, learning_rate=lr, \
      teacher_forcing_ratio=teacher_forcing_ratio, optimizer= optimizer, criterion = VAE_Loss)

In [None]:
loss_list

# Evaluation

In [None]:
def val(vae_model, data_pairs, criterion = VAE_Loss):
    loss_list = []
    
    vae_model.eval()
    for data_pair in data_pairs:
        # Seperate pair for input
        pair = seqFromPair(data_pair)
        input_seq, target_seq = pair
        
        # Check device
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Initialize hidden feature
        hidden = torch.zeros(1, 1, hidden_size, device=device)
        
        result, mu, logvar = vae_model(input_seq, hidden)
        
        # Ground truth should have EOS in the end
        target_seq.append(EOS_token)

        # Calculate loss
        # First, we should strim the sequences by the length of smaller one
        min_len = min(len(target_seq),len(result))

        # hat_y need not to do one-hot encoding
        hat_y = result[:min_len]
        y = torch.tensor(target_seq[:min_len], device=device)

        loss = criterion(hat_y, y, mu, logvar)
        
        pred_seq = ''
        for output in hat_y:
            _, c = output.topk(1)
            pred_seq += chr(c+ord('a')-1)
        print('-----------------')
        print('loss = ', loss)
        print('pred_seq = ', pred_seq)
        print('target_seq = ', data_pair[1][:min_len])

In [None]:
val(my_vae, train_st_pp, criterion = VAE_Loss)