In [56]:
import torch
import torch.nn as nn
from data_utils import build_tokenizer, build_embedding_matrix, SentenceDataset,Tokenizer, Vocab
from torch.utils.data import DataLoader
from sklearn import metrics
import os

In [20]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'

In [14]:
data_files = ['../data/td_lstm_datasets/Laptops_Train.xml', '../data/td_lstm_datasets/Laptops_Train.xml']
tokenizer = build_tokenizer(
    fnames=data_files,
    max_length=80,
    data_file='datasets/{0}_tokenizer.dat'.format('restaurant'))
embedding_matrix = build_embedding_matrix(
    vocab=tokenizer.vocab,
    embed_dim=200,
    data_file='datasets/{0}d_{1}_embedding_matrix.dat'.format('200', 'restaurant'))
trainset = SentenceDataset(data_files[0] , tokenizer, target_dim=3)
testset = SentenceDataset(data_files[1] , tokenizer, target_dim=3)

loading tokenizer: datasets/restaurant_tokenizer.dat
loading embedding matrix: datasets/200d_restaurant_embedding_matrix.dat


In [17]:
batch_size = 64
embed_dim = 200
hidden_dim = 200
polarities_dim = 3

In [16]:
train_dataloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

In [22]:
class DynamicLSTM(nn.Module):
    '''
    LSTM which can hold variable length sequence, use like TensorFlow's RNN(input, lenght...).
    '''
    def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=True, dropout=0,
                 bidirectional=False, only_use_last_hidden_state=False, rnn_type='LSTM'):
        super(DynamicLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first
        self.dropout = dropout
        self.bidirectional = bidirectional
        self.only_use_last_hidden_state = only_use_last_hidden_state
        self.rnn_type = rnn_type
        
        if self.rnn_type == 'LSTM':
            self.RNN = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                               bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
        elif self.rnn_type == 'GRU':
            self.RNN = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                              bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
        elif self.rnn_type == 'RNN':
            self.RNN = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                              bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
    
    def forward(self, x, x_len):
        '''
        sequence -> sort -> pad and pack -> process using RNN -> unpack -> unsort
        '''
        '''sort'''
        x_sort_idx = torch.sort(x_len, descending=True)[1].long()
        x_unsort_idx = torch.sort(x_sort_idx)[1].long()
        x_len = x_len[x_sort_idx]
        x = x[x_sort_idx]
        '''pack'''
        x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first)
        ''' process '''
        if self.rnn_type == 'LSTM':
            out_pack, (ht, ct) = self.RNN(x_emb_p, None)
        else:
            out_pack, ht = self.RNN(x_emb_p, None)
            ct = None
        '''unsort'''
        ht = ht[:, x_unsort_idx]
        if self.only_use_last_hidden_state:
            return ht
        else:
            out, _ = torch.nn.utils.rnn.pad_packed_sequence(out_pack, batch_first=self.batch_first)
            if self.batch_first:
                out = out[x_unsort_idx]
            else:
                out = out[:, x_unsort_idx]
            if self.rnn_type == 'LSTM':
                ct = ct[:, x_unsort_idx]
            return out, (ht, ct)


In [23]:
class LSTM(nn.Module):
    ''' Standard LSTM '''
    def __init__(self, embedding_matrix):
        super(LSTM, self).__init__()
        self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.lstm = DynamicLSTM(embed_dim, hidden_dim, num_layers=1, batch_first=True)
        self.dense = nn.Linear(hidden_dim, polarities_dim)
    
    def forward(self, inputs):
        text = inputs[0]
        x = self.embed(text)
        x_len = torch.sum(text != 0, dim=-1)
        _, (h_n, _) = self.lstm(x, x_len)
        out = self.dense(h_n[0])
        return out

In [26]:
model = LSTM(embedding_matrix).to(device)

In [34]:
def reset_params():
    for p in model.parameters():
        if p.requires_grad:
            if len(p.shape) > 1:
                torch.nn.init.xavier_normal_(p)
            else:
                stdv = 1. / (p.shape[0]**0.5)
                torch.nn.init.uniform_(p, a=-stdv, b=stdv)

In [30]:
n_trainable_params, n_nontrainable_params = 0, 0
for p in model.parameters():
    n_params = torch.prod(torch.tensor(p.shape))
    if p.requires_grad:
        n_trainable_params += n_params
    else:
        n_nontrainable_params += n_params
print('n_trainable_params: {0}, n_nontrainable_params: {1}'.format(n_trainable_params, n_nontrainable_params))

n_trainable_params: 322203, n_nontrainable_params: 627200


In [45]:
epoch = 1
lr=0.001
l2_reg=1e-5
criterion = nn.CrossEntropyLoss()
params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(params, lr=lr, weight_decay=l2_reg)
num_epoch = 20
input_cols = ['text']
log_step = 5
model_name = 'pban'
dataset = 'restaurant'

In [51]:
def train(criterion, optimizer, max_test_acc_overall=0):
    max_test_acc = 0
    max_f1 = 0
    global_step = 0
    for epoch in range(num_epoch):
        print('>' * 50)
        print('epoch:', epoch)
        n_correct, n_total = 0, 0
        for i_batch, sample_batched in enumerate(train_dataloader):
            global_step += 1
            # switch model to training mode, clear gradient accumulators
            model.train()
            optimizer.zero_grad()
            
            inputs = [sample_batched[col].to(device) for col in input_cols]
            outputs = model(inputs)
            targets = sample_batched['polarity'].to(device)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            if global_step % log_step == 0:
                n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
                n_total += len(outputs)
                train_acc = n_correct / n_total
                test_acc, f1 = evaluate()
                if test_acc > max_test_acc:
                    max_test_acc = test_acc
                    if test_acc > max_test_acc_overall:
                        if not os.path.exists('state_dict'):
                            os.mkdir('state_dict')
                        path = './state_dict/{0}_{1}_{2}class_acc{3:.4f}'.format(model_name, dataset, polarities_dim, test_acc)
                        torch.save(model.state_dict(), path)
                        print('model saved:', path)
                if f1 > max_f1:
                    max_f1 = f1
                print('loss: {:.4f}, acc: {:.4f}, test_acc: {:.4f}, f1: {:.4f}'.format(loss.item(), train_acc, test_acc, f1))
    return max_test_acc, max_f1

In [103]:
def evaluate():
    # switch model to evaluation mode
    model.eval()
    n_test_correct, n_test_total = 0, 0
    t_targets_all, t_outputs_all = None, None
    with torch.no_grad():
        for t_batch, t_sample_batched in enumerate(test_dataloader):
            t_inputs = [t_sample_batched[col].to(device) for col in input_cols]
            t_targets = t_sample_batched['polarity'].to(device)
            print(t_inputs)
            break
            t_outputs = model(t_inputs)

            n_test_correct += (torch.argmax(t_outputs, -1) == t_targets).sum().item()
            n_test_total += len(t_outputs)

            t_targets_all = torch.cat((t_targets_all, t_targets), dim=0) if t_targets_all is not None else t_targets
            t_outputs_all = torch.cat((t_outputs_all, t_outputs), dim=0) if t_outputs_all is not None else t_outputs
    test_acc = n_test_correct / n_test_total
    f1 = metrics.f1_score(t_targets_all.cpu(), torch.argmax(t_outputs_all, -1).cpu(), labels=[0, 1, 2], average='macro')
    return test_acc, f1

In [57]:
max_test_acc_overall = 0
max_f1_overall = 0
repeats = 1
for i in range(repeats):
    print('repeat:', i)
    reset_params()
    max_test_acc, max_f1 = train(criterion, optimizer, max_test_acc_overall)
    print('max_test_acc: {0}, max_f1: {1}'.format(max_test_acc, max_f1))
    max_test_acc_overall = max(max_test_acc, max_test_acc_overall)
    max_f1_overall = max(max_f1, max_f1_overall)
    print('#' * 50)
print('max_test_acc_overall:', max_test_acc_overall)
print('max_f1_overall:', max_f1_overall)

repeat: 0
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 0
model saved: ./state_dict/pban_restaurant_3class_acc0.4240
loss: 0.9940, acc: 0.5000, test_acc: 0.4240, f1: 0.2005
model saved: ./state_dict/pban_restaurant_3class_acc0.4274
loss: 1.0281, acc: 0.4766, test_acc: 0.4274, f1: 0.2003
model saved: ./state_dict/pban_restaurant_3class_acc0.5249
loss: 1.0342, acc: 0.5000, test_acc: 0.5249, f1: 0.3840
loss: 0.9486, acc: 0.5117, test_acc: 0.4979, f1: 0.3581
loss: 0.9976, acc: 0.4938, test_acc: 0.5026, f1: 0.3358
model saved: ./state_dict/pban_restaurant_3class_acc0.5997
loss: 1.0829, acc: 0.4870, test_acc: 0.5997, f1: 0.4544
model saved: ./state_dict/pban_restaurant_3class_acc0.6280
loss: 0.8143, acc: 0.5134, test_acc: 0.6280, f1: 0.4829
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 1
model saved: ./state_dict/pban_restaurant_3class_acc0.6366
loss: 0.9876, acc: 0.5469, test_acc: 0.6366, f1: 0.4911
model saved: ./state_dict/pban_restaurant_3class_acc0.6628
loss: 0.7

loss: 0.2797, acc: 0.9115, test_acc: 0.8909, f1: 0.8777
loss: 0.4040, acc: 0.8984, test_acc: 0.8883, f1: 0.8663
loss: 0.2551, acc: 0.9000, test_acc: 0.8900, f1: 0.8780
loss: 0.3308, acc: 0.8854, test_acc: 0.8926, f1: 0.8818
loss: 0.2624, acc: 0.8884, test_acc: 0.8913, f1: 0.8680
model saved: ./state_dict/pban_restaurant_3class_acc0.9021
loss: 0.3938, acc: 0.8809, test_acc: 0.9021, f1: 0.8837
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 13
loss: 0.2881, acc: 0.8906, test_acc: 0.8956, f1: 0.8836
loss: 0.2523, acc: 0.8984, test_acc: 0.9021, f1: 0.8868
loss: 0.1886, acc: 0.9115, test_acc: 0.8965, f1: 0.8736
model saved: ./state_dict/pban_restaurant_3class_acc0.9068
loss: 0.2725, acc: 0.9102, test_acc: 0.9068, f1: 0.8950
loss: 0.2241, acc: 0.9062, test_acc: 0.9042, f1: 0.8926
loss: 0.2486, acc: 0.9010, test_acc: 0.8999, f1: 0.8778
loss: 0.3338, acc: 0.8996, test_acc: 0.9029, f1: 0.8860
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 14
loss: 0.2134, acc: 0.9062, test_

In [80]:
for i in test_dataloader:
    print(i)
    break

{'text': tensor([[ 820, 1214, 2046,  ...,    0,    0,    0],
        [ 820, 1214, 2046,  ...,    0,    0,    0],
        [1323,  243, 2856,  ...,    0,    0,    0],
        ...,
        [ 311, 2171, 1323,  ...,    0,    0,    0],
        [1323, 1286, 1677,  ...,    0,    0,    0],
        [  68, 2888, 2209,  ...,    0,    0,    0]]), 'aspect': tensor([[2434,    0,    0,  ...,    0,    0,    0],
        [1787, 1888,    0,  ...,    0,    0,    0],
        [2512, 2148,    0,  ...,    0,    0,    0],
        ...,
        [ 621,    0,    0,  ...,    0,    0,    0],
        [1286,    0,    0,  ...,    0,    0,    0],
        [1726, 1884,    0,  ...,    0,    0,    0]]), 'position': tensor([[ 9,  8,  7,  ...,  0,  0,  0],
        [16, 15, 14,  ...,  0,  0,  0],
        [ 6,  5,  4,  ...,  0,  0,  0],
        ...,
        [ 3,  2,  1,  ...,  0,  0,  0],
        [ 1,  0,  1,  ...,  0,  0,  0],
        [ 8,  7,  6,  ...,  0,  0,  0]]), 'polarity': tensor([2, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [140]:
polarity_dict = {0: 'positive', 1: 'negative', 2:'neutral'}
sample_data = torch.tensor(tokenizer.text_to_sequence("Keyboard is great, very quiet for all the typing that I do."))
output = model(sample_data.reshape(1,1,-1))
polarity_dict[int(torch.argmax(output, -1))]

'positive'