In [1]:
# copyright @ Ziming Li
# version 1.0
# This code is created alone by Ziming Li, a Ph.D student from Tsinghua University, China.
# This code is for the final project of the my summer internship in Yunzhixin'an Technology Co., LTD, Zhengzhou, China.
# If you have any questions, please contact me by email: lzm22@mails.tsinghua.edu.cn

In [16]:
import pickle
import numpy as np
import time
import random
import torch
from torch import optim
import torch.utils.data as Data
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt

random.seed(1229)

from model import RNN

In [17]:
# load data from ./data

training_set = pickle.load(open('./data/training_set.pkl', 'rb'))
training_labels = pickle.load(open('./data/training_labels.pkl', 'rb'))

validation_set = pickle.load(open('./data/validation_set.pkl', 'rb'))
validation_labels = pickle.load(open('./data/validation_labels.pkl', 'rb'))

mapper = pickle.load(open('./data/mapper_between_index_and_label.pkl', 'rb'))

length_of_word_vector = len(training_set[0][0])
length_of_sentence = len(training_set[0])
number_of_labels = len(mapper)

print('length of word vector:', length_of_word_vector)
print('length of sentence:', length_of_sentence)
print('number of labels:', number_of_labels)

length of word vector: 50
length of sentence: 11
number of labels: 22


In [18]:
# define hyperparameters

experiment_name = "run"
num_epochs = 5
batch_size = 32
val_batch_size = 320
number_of_classes = number_of_labels
learning_rate = 1e-3
embed_units = length_of_word_vector
units = 64
train_dir = './trained_model'

In [19]:
# define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if not os.path.exists(train_dir):
    os.mkdir(train_dir)

print("Created model with fresh parameters.")

# model defination
model = RNN(
    embed_units, # wordvec size
    units, # hidden size
    number_of_classes) # number of words
model.to(device)

# optimizer defination
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0)

# training preparation
best_val_ppl = float("inf")
best_epoch = -1
train_loss_record, valid_loss_record = [], []

Created model with fresh parameters.


In [20]:
# convert data to DataLoader
training_set = torch.from_numpy(np.array(training_set, dtype=np.float32))
training_labels = torch.from_numpy(np.array(training_labels, dtype=np.int64))
validation_set = torch.from_numpy(np.array(validation_set, dtype=np.float32))
validation_labels = torch.from_numpy(np.array(validation_labels, dtype=np.int64))

train_dataset = Data.TensorDataset(training_set, training_labels)
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# for step,(batch_x,batch_y) in enumerate(train_loader):
#     print('| Step: ', step, '| batch x: ', batch_x.numpy(), '| batch y: ', batch_y.numpy())

In [21]:
def choose_random_sample(dataset, labels, batch_size):
    indices = random.sample(range(len(labels)), batch_size)
    return dataset[indices], labels[indices]

# choose_random_sample(training_set, training_labels, 5)

In [22]:
# begin training
best_epoch = -1
best_val_correct_percentage = 0

for epoch in range(1, num_epochs + 1):
    start_time = time.time()

    losses = []

    model.train()
    
    for batch, (batched_data, batched_label) in enumerate(train_loader):
        optimizer.zero_grad()
        loss, _ = model(batched_data, batched_label, device)
        loss.backward()
        optimizer.step()
        losses.append(loss.tolist())

        if (batch + 1) % 100 == 0:
            print("Epoch %d Batch %d, train loss %f" % (epoch, batch + 1, np.mean(losses[-100:])))

    train_loss = np.mean(losses)
    train_loss_record.append(train_loss)

    validation_batched_data, validation_batched_label = choose_random_sample(validation_set, validation_labels, val_batch_size)

    model.eval()
    with torch.no_grad():
        predicted_ans = model.predict(validation_batched_data, device)
        val_correct_percentage = torch.sum(predicted_ans == validation_batched_label).item() / val_batch_size

        if val_correct_percentage > best_val_correct_percentage:
            best_val_correct_percentage = val_correct_percentage
            best_epoch = epoch

            with open(os.path.join(train_dir, 'checkpoint_%s.pth' % experiment_name), 'wb') as fout:
                torch.save(model, fout)

    epoch_time = time.time() - start_time
    print("Epoch " + str(epoch) + " of " + str(num_epochs) + " took " + str(epoch_time) + "s")
    print("  training loss:                 " + str(train_loss))
    print("  validation correct percentage: " + str(val_correct_percentage))
    print("  best epoch:                    " + str(best_epoch))
    print("  best validation perplexity:    " + str(best_val_correct_percentage))


Epoch 1 Batch 100, train loss 2.627065
Epoch 1 Batch 200, train loss 1.469127
Epoch 1 Batch 300, train loss 0.894586
Epoch 1 Batch 400, train loss 0.610579
Epoch 1 Batch 500, train loss 0.472886
Epoch 1 Batch 600, train loss 0.399139
Epoch 1 Batch 700, train loss 0.352792
Epoch 1 Batch 800, train loss 0.360567
Epoch 1 Batch 900, train loss 0.320150
Epoch 1 Batch 1000, train loss 0.302620
Epoch 1 Batch 1100, train loss 0.293878
Epoch 1 Batch 1200, train loss 0.260569
Epoch 1 Batch 1300, train loss 0.261530
Epoch 1 Batch 1400, train loss 0.222316
Epoch 1 Batch 1500, train loss 0.239415
Epoch 1 Batch 1600, train loss 0.218006
Epoch 1 Batch 1700, train loss 0.199954
Epoch 1 Batch 1800, train loss 0.183520
Epoch 1 Batch 1900, train loss 0.207169
Epoch 1 Batch 2000, train loss 0.194234
Epoch 1 Batch 2100, train loss 0.181548
Epoch 1 Batch 2200, train loss 0.184535
Epoch 1 Batch 2300, train loss 0.191008
Epoch 1 Batch 2400, train loss 0.188694
Epoch 1 Batch 2500, train loss 0.190797
Epoch 1 B