In [6]:
if torch.cuda.is_available():
        device = torch.device("cuda")
else:
        device = torch.device("cpu")
        print("GPU not available, running on CPU.")

GPU not available, running on CPU.


In [7]:
import re
import torchtext
from torchtext import data
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import argparse
import math

my_punc = "!\"#$%&\()*+?_/:;[]{}|~,`"
table = dict((ord(char), u' ') for char in my_punc)

def clean_str(string):
    string = re.sub(r"\'s ", " ", string)
    string = re.sub(r"\'m ", " ", string)
    string = re.sub(r"\'ve ", " ", string)
    string = re.sub(r"n\'t ", " not ", string)
    string = re.sub(r"\'re ", " ", string)
    string = re.sub(r"\'d ", " ", string)
    string = re.sub(r"\'ll ", " ", string)
    string = re.sub("-", " ", string)
    string = re.sub(r"@", " ", string)
    string = re.sub('\'', '', string)
    string = string.translate(table)
    string = string.replace("..", "").strip()
    return string

def tokenizer_function(text):
    text = [x for x in text.split(" ") if x != "" and x.find(" ") == -1]
    return text

class MyDataset(data.Dataset):
    def __init__(self, path, text_field, **kwargs):
        fields = [('text', text_field)]
        examples = []
        with open(path, 'r') as f:
            for text in f:
                examples.append(data.Example.fromlist([text], fields))
        super(MyDataset, self).__init__(examples, fields, **kwargs)

    @classmethod
    def splits(cls, text_field, train='train', **kwargs):
        return super(MyDataset, cls).splits(text_field=text_field, train=train, **kwargs)

def get_iterators(opt):
    text_field = data.Field(init_token=opt.start_token, eos_token=opt.end_token, lower=True, tokenize=tokenizer_function, batch_first=True)
    train_data, val_data = MyDataset.splits(path="", train="train.txt", test="test.txt", text_field=text_field)
    text_field.build_vocab(train_data, val_data, max_size=opt.n_vocab-4, vectors='glove.6B.300d')
    train_vocab = text_field.vocab

    train_iter, val_iter = data.BucketIterator.splits((train_data, val_data), batch_size=opt.batch_size, sort_key=lambda x: len(x.text), repeat=False)
    return train_iter, val_iter, train_vocab

def get_cuda(tensor):
    return tensor

def get_sentences_in_batch(x, vocab):
    for sent in x:
        str1 = ""
        for word in sent:
            str1 += vocab.itos[word] + " "
        print(str1)

class Highway(nn.Module):
    def __init__(self, opt):
        super(Highway, self).__init__()
        self.n_layers = opt.n_highway_layers
        self.non_linear = nn.ModuleList([nn.Linear(opt.n_embed, opt.n_embed) for _ in range(self.n_layers)])
        self.linear = nn.ModuleList([nn.Linear(opt.n_embed, opt.n_embed) for _ in range(self.n_layers)])
        self.gate = nn.ModuleList([nn.Linear(opt.n_embed, opt.n_embed) for _ in range(self.n_layers)])

    def forward(self, x):
        for layer in range(self.n_layers):
            gate = torch.sigmoid(self.gate[layer](x))
            non_linear = F.relu(self.non_linear[layer](x))
            linear = self.linear[layer](x)
            x = gate * non_linear + (1 - gate) * linear
        return x

class Encoder(nn.Module):
    def __init__(self, opt):
        super(Encoder, self).__init__()
        self.highway = Highway(opt)
        self.n_hidden_E = opt.n_hidden_E
        self.n_layers_E = opt.n_layers_E
        self.lstm = nn.LSTM(input_size=opt.n_embed, hidden_size=opt.n_hidden_E, num_layers=opt.n_layers_E, batch_first=True, bidirectional=True)

    def init_hidden(self, batch_size):
        h_0 = torch.zeros(2*self.n_layers_E, batch_size, self.n_hidden_E)
        c_0 = torch.zeros(2*self.n_layers_E, batch_size, self.n_hidden_E)
        self.hidden = (get_cuda(h_0), get_cuda(c_0))

    def forward(self, x):
        batch_size, n_seq, n_embed = x.size()
        x = self.highway(x)
        self.init_hidden(batch_size)
        _, (self.hidden, _) = self.lstm(x, self.hidden)
        self.hidden = self.hidden.view(self.n_layers_E, 2, batch_size, self.n_hidden_E)
        self.hidden = self.hidden[-1]
        e_hidden = torch.cat(list(self.hidden), dim=1)
        return e_hidden

class Generator(nn.Module):
    def __init__(self, opt):
        super(Generator, self).__init__()
        self.n_hidden_G = opt.n_hidden_G
        self.n_layers_G = opt.n_layers_G
        self.n_z = opt.n_z
        self.lstm = nn.LSTM(input_size=opt.n_embed+opt.n_z, hidden_size=opt.n_hidden_G, num_layers=opt.n_layers_G, batch_first=True)
        self.fc = nn.Linear(opt.n_hidden_G, opt.n_vocab)

    def init_hidden(self, batch_size):
        h_0 = torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
        c_0 = torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
        self.hidden = (get_cuda(h_0), get_cuda(c_0))

    def forward(self, x, z, g_hidden=None):
        batch_size, n_seq, n_embed = x.size()
        z = torch.cat([z]*n_seq, 1).view(batch_size, n_seq, self.n_z)
        x = torch.cat([x, z], dim=2)

        if g_hidden is None:
            self.init_hidden(batch_size)
        else:
            self.hidden = g_hidden

        output, self.hidden = self.lstm(x, self.hidden)
        output = self.fc(output)

        return output, self.hidden

class VAE(nn.Module):
    def __init__(self, opt):
        super(VAE, self).__init__()
        self.embedding = nn.Embedding(opt.n_vocab, opt.n_embed)
        self.encoder = Encoder(opt)
        self.hidden_to_mu = nn.Linear(2*opt.n_hidden_E, opt.n_z)
        self.hidden_to_logvar = nn.Linear(2*opt.n_hidden_G, opt.n_z)
        self.generator = Generator(opt)
        self.n_z = opt.n_z

    def forward(self, x, G_inp, z=None, G_hidden=None):
        if z is None:
            batch_size, n_seq = x.size()
            x = self.embedding(x)
            E_hidden = self.encoder(x)
            mu = self.hidden_to_mu(E_hidden)
            logvar = self.hidden_to_logvar(E_hidden)
            z = torch.randn([batch_size, self.n_z])
            z = mu + z * torch.exp(0.5 * logvar)
            kld = -0.5 * torch.sum(logvar - mu.pow(2) - logvar.exp() + 1, 1).mean()
        else:
            kld = None

        G_inp = self.embedding(G_inp)

        logit, G_hidden = self.generator(G_inp, z, G_hidden)
        return logit, G_hidden, kld

parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', type=int, default=128)
parser.add_argument('--n_vocab', type=int, default=12000)
parser.add_argument('--epochs', type=int, default=121)
parser.add_argument('--n_hidden_G', type=int, default=512)
parser.add_argument('--n_layers_G', type=int, default=2)
parser.add_argument('--n_hidden_E', type=int, default=512)
parser.add_argument('--n_layers_E', type=int, default=1)
parser.add_argument('--n_z', type=int, default=100)
parser.add_argument('--word_dropout', type=float, default=0.5)
parser.add_argument('--rec_coef', type=float, default=7)
parser.add_argument('--lr', type=float, default=0.0001)
parser.add_argument('--gpu_device', type=int, default=1)
parser.add_argument('--n_highway_layers', type=int, default=2)
parser.add_argument('--n_embed', type=int, default=300)
parser.add_argument('--unk_token', type=str, default="<unk>")
parser.add_argument('--pad_token', type=str, default="<pad>")
parser.add_argument('--start_token', type=str, default="<sos>")
parser.add_argument('--end_token', type=str, default="<eos>")

def str2bool(v):
    if v.lower() == 'true':
        return True
    else:
        return False
    
parser.add_argument('--resume_training', type=bool, default=False)
parser.add_argument('--to_train', type=bool, default=True)


opt, unknown = parser.parse_known_args()
print(opt)
save_path = "saved_models/vae_model.tar"
if not os.path.exists("saved_models"):
    os.makedirs("saved_models")

train_iter, val_iter, vocab = get_iterators(opt)

vae = VAE(opt)
vae.embedding.weight.data.copy_(vocab.vectors)
vae = get_cuda(vae)
trainer_vae = torch.optim.Adam(vae.parameters(), lr=opt.lr)

def create_generator_input(x, train):
    G_inp = x[:, 0:x.size(1)-1].clone()
    if train is False:
        return G_inp

    r = np.random.rand(G_inp.size(0), G_inp.size(1))
    for i in range(len(G_inp)):
        for j in range(1, G_inp.size(1)):
            if r[i, j] < opt.word_dropout and G_inp[i, j] not in [vocab.stoi[opt.pad_token], vocab.stoi[opt.end_token]]:
                G_inp[i, j] = vocab.stoi[opt.unk_token]

    return G_inp

def train_batch(x, G_inp, step, train=True):
    logit, _, kld = vae(x, G_inp, None, None)
    logit = logit.view(-1, opt.n_vocab)
    x = x[:, 1:x.size(1)]
    x = x.contiguous().view(-1)
    rec_loss = F.cross_entropy(logit, x)
    kld_coef = (math.tanh((step - 15000)/1000) + 1) / 2
    loss = opt.rec_coef * rec_loss + kld_coef * kld
    if train is True:
        trainer_vae.zero_grad()
        loss.backward()
        trainer_vae.step()
    return rec_loss.item(), kld.item()

def load_model_from_checkpoint():
    global vae, trainer_vae
    checkpoint = torch.load(save_path, map_location='cpu')
    vae.load_state_dict(checkpoint['vae_dict'])
    trainer_vae.load_state_dict(checkpoint['vae_trainer'])
    return checkpoint['step'], checkpoint['epoch']

def training():
    start_epoch = step = 0
    if opt.resume_training:
        step, start_epoch = load_model_from_checkpoint()
    for epoch in range(start_epoch, opt.epochs):
        vae.train()
        train_rec_loss = []
        train_kl_loss = []
        for batch in train_iter:
            x = batch.text
            G_inp = create_generator_input(x, train=True)
            rec_loss, kl_loss = train_batch(x, G_inp, step, train=True)
            train_rec_loss.append(rec_loss)
            train_kl_loss.append(kl_loss)
            step += 1

        vae.eval()
        valid_rec_loss = []
        valid_kl_loss = []
        for batch in val_iter:
            x = batch.text
            G_inp = create_generator_input(x, train=False)
            with torch.no_grad():
                rec_loss, kl_loss = train_batch(x, G_inp, step, train=False)
            valid_rec_loss.append(rec_loss)
            valid_kl_loss.append(kl_loss)

        train_rec_loss = np.mean(train_rec_loss)
        train_kl_loss = np.mean(train_kl_loss)
        valid_rec_loss = np.mean(valid_rec_loss)
        valid_kl_loss = np.mean(valid_kl_loss)

        print("No.", epoch, "T_rec:", '%.2f' % train_rec_loss, "T_kld:", '%.2f' % train_kl_loss, "V_rec:", '%.2f' % valid_rec_loss, "V_kld:", '%.2f' % valid_kl_loss)
        if epoch % 5 == 0:
            torch.save({
                'epoch': epoch + 1,
                'vae_dict': vae.state_dict(),
                'vae_trainer': trainer_vae.state_dict(),
                'step': step
            }, save_path)

def generate_sentences(n_examples):
    checkpoint = torch.load(save_path, map_location='cpu')
    vae.load_state_dict(checkpoint['vae_dict'])
    vae.eval()
    del checkpoint
    for i in range(n_examples):
        z = torch.randn([1, opt.n_z])
        h_0 = torch.zeros(opt.n_layers_G, 1, opt.n_hidden_G)
        c_0 = torch.zeros(opt.n_layers_G, 1, opt.n_hidden_G)
        G_hidden = (h_0, c_0)
        G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[opt.start_token])
        G_inp = get_cuda(G_inp)
        str = opt.start_token + " "
        while G_inp[0][0].item() != vocab.stoi[opt.end_token]:
            with torch.no_grad():
                logit, G_hidden, _ = vae(None, G_inp, z, G_hidden)
            probs = F.softmax(logit[0], dim=1)
            G_inp = torch.multinomial(probs, 1)
            str += (vocab.itos[G_inp[0][0].item()] + " ")
        print(str.encode('utf-8'))

if __name__ == '__main__':
    if opt.to_train:
        training()
    else:
        generate_sentences(10)

Namespace(batch_size=128, n_vocab=12000, epochs=121, n_hidden_G=512, n_layers_G=2, n_hidden_E=512, n_layers_E=1, n_z=100, word_dropout=0.5, rec_coef=7, lr=0.0001, gpu_device=1, n_highway_layers=2, n_embed=300, unk_token='<unk>', pad_token='<pad>', start_token='<sos>', end_token='<eos>', resume_training=False, to_train=True)


AssertionError: Torch not compiled with CUDA enabled

In [3]:
device = torch.device("cpu")