In [2]:
import numpy as np
import torchtext
from torchtext.data import Field, Dataset, BucketIterator
import pandas as pd
from DataFrameDataSet import DataFrameDataset

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.nn.functional import log_softmax, relu
from torch.optim import Adam, SGD

from random import random, seed, shuffle

In [3]:
train_df = pd.read_csv("./data/train.csv")
train_df.head()

Unnamed: 0,utts,labels
0,"Guten Tag, I am staying overnight in Cambridge...",find_hotel
1,Hi there! Can you give me some info on Cityroomz?,find_hotel
2,I am looking for a hotel named alyesbray lodge...,find_hotel
3,I'm looking for a places to go and see during ...,find_hotel
4,I need a place to stay that has free wifi.,find_hotel


In [34]:
PAD="<pad>"
UNK="<unk>"
START="<start>"
END="<end>"

TEXT = Field(
    sequential=True,
    init_token = START,
    eos_token=END,
    pad_token=PAD,
    unk_token=UNK,
    tokenize=lambda x : x.lower().split())
LABEL = Field(sequential=False, unk_token=None)



In [35]:
fields = {'labels' : LABEL,'utts' : TEXT}
train = DataFrameDataset(train_df, fields)

TEXT.build_vocab(train) 
LABEL.build_vocab(train)

In [37]:
from torchtext.data import Iterator, BucketIterator

train_iter = BucketIterator(
        train,
        batch_size=16,
        device="cpu",
        shuffle=True)



In [42]:
# Ensure reproducible results.
seed(0)
torch.manual_seed(0)
np.random.seed(0)

class BiLSTM(nn.Module):
  
  def __init__(self, embedding_size, vocab_size, output_size, hidden_size, num_layers):
    super(BiLSTM, self).__init__()

    self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_size)
    self.rnn_layer = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True) 
    self.activation_fn = nn.ReLU()
    self.linear_layer = nn.Linear(hidden_size*2, output_size) 
    self.softmax_layer = nn.LogSoftmax(dim=1)
  
  def forward(self, x):
    out = self.embedding(x)
    out, _ = self.rnn_layer(out) # since we are not feeding h_0 explicitly, h_0 will be initialized to zeros by default
    out = out[-1]
    out = self.activation_fn(out)
    out = self.linear_layer(out)
    out = self.softmax_layer(out) # accepts 2D or more dimensional inputs
    return out

In [50]:
model = BiLSTM(embedding_size=100, vocab_size=len(TEXT.vocab.stoi), output_size=2, hidden_size=100, num_layers=1)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_EPOCHS = 10

In [55]:
from sklearn.metrics import accuracy_score

def train_function(loader):
    total_loss = 0.0
    # iterate throught the data loader
    num_sample = 0
    for batch in loader:
        # load the current batch
        batch_input = batch.utts
        batch_output = batch.labels
        
        batch_input = batch_input.to(device)
        batch_output = batch_output.to(device)
        # forward propagation
        # pass the data through the model
        model_outputs = model(batch_input)
        # compute the loss
        cur_loss = criterion(model_outputs, batch_output)
        total_loss += cur_loss.item()

        # backward propagation (compute the gradients and update the model)
        # clear the buffer
        optimizer.zero_grad()
        # compute the gradients
        cur_loss.backward()
        # update the weights
        optimizer.step()

        num_sample += batch_output.shape[0]
    return total_loss/num_sample

def evaluate(loader):
    all_pred=[]
    all_label = []
    with torch.no_grad(): # impacts the autograd engine and deactivate it. reduces memory usage and speeds up computation
        for batch in loader:
             # load the current batch
            batch_input = batch.utts
            batch_output = batch.labels

            batch_input = batch_input.to(device)
            # forward propagation
            # pass the data through the model
            model_outputs = model(batch_input)
            # identify the predicted class for each example in the batch
            probabilities, predicted = torch.max(model_outputs.cpu().data, 1)
            # put all the true labels and predictions to two lists
            all_pred.extend(predicted)
            all_label.extend(batch_output)
            
    accuracy = accuracy_score(all_label, all_pred)
    return accuracy

In [56]:
for epoch in range(MAX_EPOCHS):
    # train the model for one pass over the data
    train_loss = train_function(train_iter)  
    # compute the training accuracy
    train_acc = evaluate(train_iter)
    # compute the validation accuracy
#     val_acc = evaluate(val_iter)
    
    # print the loss for every epoch
#     print('Epoch [{}/{}], Loss: {:.4f}, Training Accuracy: {:.4f}, Validation Accuracy: {:.4f}'.format(epoch+1, MAX_EPOCHS, train_loss, train_acc, val_acc))
    print('Epoch [{}/{}], Loss: {:.4f}, Training Accuracy: {:.4f}'.format(epoch+1, MAX_EPOCHS, train_loss, train_acc))
    
    # save model, optimizer, and number of epoch to a dictionary
    model_save = {
            'epoch': epoch,  # number of epoch
            'model_state_dict': model.state_dict(), # model parameters 
            'optimizer_state_dict': optimizer.state_dict(), # save optimizer 
            'loss': train_loss # training loss
            }

Epoch [1/10], Loss: 0.0615, Training Accuracy: 0.4279
Epoch [2/10], Loss: 0.0499, Training Accuracy: 0.4354
Epoch [3/10], Loss: 0.0447, Training Accuracy: 0.7045
Epoch [4/10], Loss: 0.0398, Training Accuracy: 0.7037
Epoch [5/10], Loss: 0.0401, Training Accuracy: 0.7431
Epoch [6/10], Loss: 0.0370, Training Accuracy: 0.7824
Epoch [7/10], Loss: 0.0373, Training Accuracy: 0.5822
Epoch [8/10], Loss: 0.0399, Training Accuracy: 0.7968
Epoch [9/10], Loss: 0.0372, Training Accuracy: 0.7731
Epoch [10/10], Loss: 0.0376, Training Accuracy: 0.7178
