In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
from os import system
from dataloader import *
from VAE import *
from scores import *

import unicodedata
import string
import re
import random
import time
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
plt.switch_backend('agg')

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Prepare data

In [3]:
train_vocab = load_data('./data/train.txt')
test_vocab = load_data('./data/test.txt')

## Get different tense pairs for (unconditional) VAE training

In [4]:
def get_tense_paris(train_vocab, source_index, target_index):
    pairs = []

    for vocabs in train_vocab:
        pairs.append((vocabs[source_index],vocabs[target_index]))
        
    return pairs

### Simple Present -> Third Person

In [5]:
train_st_tp  = get_tense_paris(train_vocab, 0, 1)

### Simple Present -> Present Progressive

In [6]:
train_st_pp  = get_tense_paris(train_vocab, 0, 2)

### Simple Present -> Past

In [7]:
train_st_past  = get_tense_paris(train_vocab, 0, 3)

# Train VAE

In [8]:
vocab_size = 28 #The number of vocabulary
SOS_token = 0
EOS_token = vocab_size-1

In [9]:
#----------Hyper Parameters----------#
hidden_size = 256
teacher_forcing_ratio = 1.0
empty_input_ratio = 0.1
KLD_weight = 0.0
lr = 0.05

In [10]:
def seqFromPair(pair):
    ord_a = ord('a')
    input_seq = [ord(c) - ord_a + 1 for c in pair[0]]
    target_seq = [ord(c) - ord_a + 1 for c in pair[1]]
    
    return input_seq, target_seq

In [11]:
def make_onehot(idx,num_classes=vocab_size):
    idx = torch.LongTensor(idx)
    return torch.zeros(len(idx), num_classes).scatter_(1, idx.unsqueeze(1), 1.)

In [12]:
def train(vae_model, input_seq, target_seq, use_teacher_forcing, optimizer, criterion):
    optimizer.zero_grad()
    
    # Initialize hidden feature
    hidden = torch.zeros(1, 1, hidden_size, device=device)
        
    # Run model
    if use_teacher_forcing:
        result, mu, logvar = vae_model(input_seq, hidden, use_teacher_forcing, target_seq)
    else:
        result, mu, logvar = vae_model(input_seq, hidden, use_teacher_forcing, None)
            
            
    # Ground truth should have EOS in the end
    target_seq.append(EOS_token)
        
    # Calculate loss
    # First, we should strim the sequences by the length of smaller one
    min_len = min(len(target_seq),len(result))
        
    # hat_y need not to do one-hot encoding
    hat_y = result[:min_len]
    y = torch.tensor(target_seq[:min_len], device=device)
        
    loss = criterion(hat_y, y, mu, logvar)
        
    loss.backward()
    optimizer.step()
    
    return loss.item()

In [13]:
def trainIter(vae_model, data_pairs, n_iters, print_every=1000, learning_rate=0.01, teacher_forcing_ratio = 1.0,\
         optimizer = None, criterion = VAE_Loss):
    loss_list = []
    
    # Randomly generate training pairs from data
    training_pairs = [seqFromPair(random.choice(data_pairs))
                      for i in range(n_iters)]    
    
    # Check device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Check optimizer; default: SGD
    if optimizer is None:
        optimizer = optim.SGD(vae_model.parameters(), lr=learning_rate)
    
    for i in range(n_iters):        
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        # Seperate pair for input
        input_seq, target_seq = training_pairs[i] 
        
        loss = train(vae_model, input_seq, target_seq, use_teacher_forcing, optimizer, criterion)
    
        if (i+1) % print_every == 0:
            print('Iter %d: loss = %.4f' % (i+1, loss))
    
    return loss_list

## Setting hyperparameters

In [14]:
my_vae = VAE(vocab_size, hidden_size, vocab_size, teacher_forcing_ratio).to(device)

In [15]:
optimizer = optim.SGD(my_vae.parameters(), lr=lr)

## Train with Simple Present -> Present Progressive

In [16]:
loss_list = trainIter(my_vae, train_st_pp, n_iters=10000, print_every=500, learning_rate=lr, \
      teacher_forcing_ratio=teacher_forcing_ratio, optimizer= optimizer, criterion = VAE_Loss)

Iter 0: loss = 4.2593
Iter 500: loss = 2.2739
Iter 1000: loss = 1.7394
Iter 1500: loss = 1.7066
Iter 2000: loss = 1.8517
Iter 2500: loss = 1.7109
Iter 3000: loss = 1.5565
Iter 3500: loss = 1.8507
Iter 4000: loss = 1.6925
Iter 4500: loss = 1.8890
Iter 5000: loss = 1.4966
Iter 5500: loss = 1.9068
Iter 6000: loss = 1.8241
Iter 6500: loss = 1.4384
Iter 7000: loss = 1.7977
Iter 7500: loss = 1.3636
Iter 8000: loss = 1.3629
Iter 8500: loss = 1.2114
Iter 9000: loss = 1.5430
Iter 9500: loss = 1.0907


In [17]:
loss_list

[]

In [18]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output

tensor(2.7124, grad_fn=<NllLossBackward>)