In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
from os import system
from dataloader import *
from VAE import *
from scores import *

import unicodedata
import string
import re
import random
import time
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
plt.switch_backend('agg')

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Prepare data

In [3]:
train_vocab = load_data('./data/train.txt')
test_vocab = load_data('./data/test.txt')

## Get different tense pairs for (unconditional) VAE training

In [4]:
def get_tense_paris(train_vocab, source_index, target_index):
    pairs = []

    for vocabs in train_vocab:
        pairs.append((vocabs[source_index],vocabs[target_index]))
        
    return pairs

### Simple Present -> Third Person

In [5]:
train_st_tp  = get_tense_paris(train_vocab, 0, 1)

### Simple Present -> Present Progressive

In [6]:
train_st_pp  = get_tense_paris(train_vocab, 0, 2)

### Simple Present -> Past

In [7]:
train_st_past  = get_tense_paris(train_vocab, 0, 3)

# Train VAE

In [8]:
def seqFromPair(pair):
    ord_a = ord('a')
    input_seq = [ord(c) - ord_a + 1 for c in pair[0]]
    target_seq = [ord(c) - ord_a + 1 for c in pair[1]]
    
    return input_seq, target_seq

In [25]:
def make_onehot(idx,num_classes=vocab_size):
    return torch.zeros(len(idx), num_classes).scatter_(1, idx.unsqueeze(1), 1.)

In [26]:
vocab_size = 28 #The number of vocabulary
SOS_token = 0
EOS_token = vocab_size-1

def train(vae_model, data_pairs, n_iters, print_every=1000, learning_rate=0.01, teacher_forcing_ratio = 1.0,\
         optimizer = None):
    loss_list = []
    
    # Randomly generate training pairs from data
    training_pairs = [seqFromPair(random.choice(data_pairs))
                      for i in range(n_iters)]    
    
    # Check device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Check optimizer; default: SGD
    if optimizer is None:
        optimizer = optim.SGD(vae_model.parameters(), lr=learning_rate)
    
    for i in range(n_iters):
        # Initialize hidden feature
        hidden = torch.zeros(1, 1, hidden_size, device=device)
        
        # Seperate pair for input
        input_seq, target_seq = training_pairs[i]   
        
        # Determine whether to use teacher forcing
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        
        # Run model
        if use_teacher_forcing:
            result, mu, logvar = vae_model(input_seq, hidden, use_teacher_forcing, target_seq)
        else:
            result, mu, logvar = vae_model(input_seq, hidden, use_teacher_forcing, None)
            
        # ground truth should add EOS in the end
        target_seq.append(EOS_token)
        
        # Calculate loss
        min_len = min(len(target_seq),len(result))
        
        #-------- make_onehot will have problem back-propagating.. --------#
        hat_y = make_onehot(result[:min_len])
        y = make_onehot(target_seq[:min_len])
            
        loss = VAE_Loss(hat_y, y, mu, logvar)
        loss_list.append(loss)
        
        loss.backward()
        optimizer.step()
    
        if i % print_every == 0:
            print('Iter %d: loss = %.4f' % (i, loss))
    
    return loss_list

## Setting hyperparameters

In [27]:
#----------Hyper Parameters----------#
hidden_size = 256
teacher_forcing_ratio = 1.0
empty_input_ratio = 0.1
KLD_weight = 0.0
lr = 0.05

In [28]:
my_vae = VAE(vocab_size, hidden_size, vocab_size, teacher_forcing_ratio).to(device)

In [29]:
optimizer = optim.SGD(my_vae.parameters(), lr=lr)

## Train with Simple Present -> Present Progressive

In [30]:
train(my_vae, train_st_pp, n_iters=100, print_every=1, learning_rate=lr, \
      teacher_forcing_ratio=teacher_forcing_ratio, optimizer= optimizer)

RuntimeError: Function AddBackward0 returned an invalid gradient at index 1 - expected type TensorOptions(dtype=float, device=cuda:0, layout=Strided, requires_grad=false) but got TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false) (validate_outputs at /pytorch/torch/csrc/autograd/engine.cpp:484)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x46 (0x7f6c80de3536 in /home/tl32rodan/.local/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x2d83d24 (0x7f6c63fe7d24 in /home/tl32rodan/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #2: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x548 (0x7f6c63fe9858 in /home/tl32rodan/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #3: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x3d2 (0x7f6c63feb7e2 in /home/tl32rodan/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #4: torch::autograd::Engine::thread_init(int) + 0x39 (0x7f6c63fe3e59 in /home/tl32rodan/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #5: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7f6c81b6a5f8 in /home/tl32rodan/.local/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #6: <unknown function> + 0xc819d (0x7f6c94e0f19d in /opt/conda/bin/../lib/libstdc++.so.6)
frame #7: <unknown function> + 0x76db (0x7f6c983d66db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #8: clone + 0x3f (0x7f6c980ffa3f in /lib/x86_64-linux-gnu/libc.so.6)
