In [1]:
import torch
import torch.nn as nn
from data_utils import build_tokenizer, build_embedding_matrix, SentenceDataset,Tokenizer, Vocab
from torch.utils.data import DataLoader
from sklearn import metrics
import os
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F


In [2]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'

In [3]:
# data_files = ['../data/td_lstm_datasets/Laptops_Train.xml', '../data/td_lstm_datasets/Laptops_Test.xml']
data_files = ['../data/td_lstm_datasets/Restaurants_Train.xml', '../data/td_lstm_datasets/Restaurants_Test.xml']
tokenizer = build_tokenizer(
    fnames=data_files,
    max_length=80,
    data_file='datasets/{0}_tokenizer.dat'.format('restaurants'))
embedding_matrix = build_embedding_matrix(
    vocab=tokenizer.vocab,
    embed_dim=200,
    data_file='datasets/{0}d_{1}_embedding_matrix.dat'.format('200', 'restaurants'))
trainset = SentenceDataset(data_files[0] , tokenizer, target_dim=3)
testset = SentenceDataset(data_files[1] , tokenizer, target_dim=3)

loading tokenizer: datasets/restaurants_tokenizer.dat
loading embedding matrix: datasets/200d_restaurants_embedding_matrix.dat


#### Parameters needs to be set before runnning this model

In [4]:
epoch = 1
lr=0.001
l2_reg=1e-5
num_epoch = 20
input_cols = ['text']
log_step = 5
model_name = 'lstm'
dataset = 'restaurant'
batch_size = 64
embed_dim = 200
hidden_dim = 200
polarities_dim = 3
polarity_dict = {0: 'positive', 1: 'negative', 2:'neutral'}

In [5]:
train_dataloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

In [6]:
polarity_count_train = {'positive':0, 'negative': 1, 'neutral':2}
polarity_count_test = {'positive':0, 'negative': 1, 'neutral':2}
for i in train_dataloader:
    for polarity in [polarity_dict[int(j)] for j in i['polarity']]:
        polarity_count_train[polarity] += 1
for i in test_dataloader:
    for polarity in [polarity_dict[int(j)] for j in i['polarity']]:
        polarity_count_test[polarity] += 1
print("Training dataset : " , polarity_count_train)
print("Testing dataset : " , polarity_count_test)

Training dataset :  {'positive': 2164, 'negative': 808, 'neutral': 639}
Testing dataset :  {'positive': 728, 'negative': 197, 'neutral': 198}


In [7]:
class DynamicLSTM(nn.Module):
    '''
    LSTM which can hold variable length sequence, use like TensorFlow's RNN(input, lenght...).
    '''
    def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=True, dropout=0,
                 bidirectional=False, only_use_last_hidden_state=False, rnn_type='LSTM'):
        super(DynamicLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first
        self.dropout = dropout
        self.bidirectional = bidirectional
        self.only_use_last_hidden_state = only_use_last_hidden_state
        self.rnn_type = rnn_type
        
        if self.rnn_type == 'LSTM':
            self.RNN = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                               bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
        elif self.rnn_type == 'GRU':
            self.RNN = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                              bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
        elif self.rnn_type == 'RNN':
            self.RNN = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                              bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
    
    def forward(self, x, x_len):
        '''
        sequence -> sort -> pad and pack -> process using RNN -> unpack -> unsort
        '''
        '''sort'''
        x_sort_idx = torch.sort(x_len, descending=True)[1].long()
        x_unsort_idx = torch.sort(x_sort_idx)[1].long()
        x_len = x_len[x_sort_idx]
        x = x[x_sort_idx]
        '''pack'''
        x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first)
        ''' process '''
        if self.rnn_type == 'LSTM':
            out_pack, (ht, ct) = self.RNN(x_emb_p, None)
        else:
            out_pack, ht = self.RNN(x_emb_p, None)
            ct = None
        '''unsort'''
        ht = ht[:, x_unsort_idx]
        if self.only_use_last_hidden_state:
            return ht
        else:
            out, _ = torch.nn.utils.rnn.pad_packed_sequence(out_pack, batch_first=self.batch_first)
            if self.batch_first:
                out = out[x_unsort_idx]
            else:
                out = out[:, x_unsort_idx]
            if self.rnn_type == 'LSTM':
                ct = ct[:, x_unsort_idx]
            return out, (ht, ct)


In [8]:
class LSTM(nn.Module):
    ''' Standard LSTM '''
    def __init__(self, embedding_matrix):
        super(LSTM, self).__init__()
        self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.lstm = DynamicLSTM(embed_dim, hidden_dim, num_layers=1, batch_first=True)
        self.dense = nn.Linear(hidden_dim, polarities_dim)
    
    def forward(self, inputs):
        text = inputs[0]
        x = self.embed(text)
        x_len = torch.sum(text != 0, dim=-1)
        _, (h_n, _) = self.lstm(x, x_len)
        out = self.dense(h_n[0])
        return out

In [9]:
model = LSTM(embedding_matrix).to(device)

In [10]:
criterion = nn.CrossEntropyLoss()
params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(params, lr=lr, weight_decay=l2_reg)
writer = SummaryWriter(f"runs/LSTM/BatchSize {batch_size} LR {lr}")

In [11]:
def reset_params(model):
    for p in model.parameters():
        if p.requires_grad:
            if len(p.shape) > 1:
                torch.nn.init.xavier_normal_(p)
            else:
                stdv = 1. / (p.shape[0]**0.5)
                torch.nn.init.uniform_(p, a=-stdv, b=stdv)

In [12]:
n_trainable_params, n_nontrainable_params = 0, 0
for p in model.parameters():
    n_params = torch.prod(torch.tensor(p.shape))
    if p.requires_grad:
        n_trainable_params += n_params
    else:
        n_nontrainable_params += n_params
print('n_trainable_params: {0}, n_nontrainable_params: {1}'.format(n_trainable_params, n_nontrainable_params))

n_trainable_params: 322203, n_nontrainable_params: 889000


In [13]:
def train(model, criterion, optimizer, writer, max_test_acc_overall=0, model_name='LSTM'):
    max_test_acc = 0
    max_f1 = 0
    global_step = 0
    for epoch in range(num_epoch):
        print('>' * 50)
        print('epoch:', epoch)
        n_correct, n_total = 0, 0
        for i_batch, sample_batched in enumerate(train_dataloader):
            global_step += 1
            # switch model to training mode, clear gradient accumulators
            model.train()
            optimizer.zero_grad()
            
            inputs = [sample_batched[col].to(device) for col in input_cols]
            outputs = model(inputs)
            targets = sample_batched['polarity'].to(device)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            writer.add_scalar("Training loss", loss, global_step=global_step)
            

            if global_step % log_step == 0:
                n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
                n_total += len(outputs)
                train_acc = n_correct / n_total
                writer.add_scalar("Training Accuracy", 
                                  train_acc,
                                  global_step=global_step)
                test_acc, f1 = evaluate(model, writer, global_step)
                if test_acc > max_test_acc:
                    max_test_acc = test_acc
                    if test_acc > max_test_acc_overall:
                        if not os.path.exists('state_dict'):
                            os.mkdir('state_dict')
                        path = './state_dict/{0}_{1}_{2}class_acc{3:.4f}'.format(model_name, dataset, polarities_dim, test_acc)
                        torch.save(model.state_dict(), path)
                        print('model saved:', path)
                if f1 > max_f1:
                    max_f1 = f1
                print('loss: {:.4f}, acc: {:.4f}, test_acc: {:.4f}, f1: {:.4f}'.format(loss.item(), train_acc, test_acc, f1))
    return max_test_acc, max_f1

In [14]:
def evaluate(model, writer, step):
    # switch model to evaluation mode
    model.eval()
    n_test_correct, n_test_total = 0, 0
    t_targets_all, t_outputs_all = None, None
    with torch.no_grad():
        for t_batch, t_sample_batched in enumerate(test_dataloader):
            t_inputs = [t_sample_batched[col].to(device) for col in input_cols]
            t_targets = t_sample_batched['polarity'].to(device)
            t_outputs = model(t_inputs)

            n_test_correct += (torch.argmax(t_outputs, -1) == t_targets).sum().item()
            n_test_total += len(t_outputs)

            t_targets_all = torch.cat((t_targets_all, t_targets), dim=0) if t_targets_all is not None else t_targets
            t_outputs_all = torch.cat((t_outputs_all, t_outputs), dim=0) if t_outputs_all is not None else t_outputs
    test_acc = n_test_correct / n_test_total
    writer.add_scalar("Testing Accuracy", 
                                  test_acc,
                                  global_step=step)
    f1 = metrics.f1_score(t_targets_all.cpu(), torch.argmax(t_outputs_all, -1).cpu(), labels=[0, 1, 2], average='macro')
    return test_acc, f1

In [15]:
def run(model, writer):
    max_test_acc_overall = 0
    max_f1_overall = 0
    repeats = 1
    for i in range(repeats):
        print('repeat:', i)
        reset_params(model)
        max_test_acc, max_f1 = train(model, criterion, optimizer, writer, max_test_acc_overall)
        print('max_test_acc: {0}, max_f1: {1}'.format(max_test_acc, max_f1))
        max_test_acc_overall = max(max_test_acc, max_test_acc_overall)
        max_f1_overall = max(max_f1, max_f1_overall)
        print('#' * 50)
    print('max_test_acc_overall:', max_test_acc_overall)
    print('max_f1_overall:', max_f1_overall)

In [16]:
run(model, writer)

repeat: 0
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 0
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6500
loss: 0.9797, acc: 0.5938, test_acc: 0.6500, f1: 0.2626
loss: 1.0987, acc: 0.5391, test_acc: 0.6500, f1: 0.2626
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6527
loss: 1.0126, acc: 0.5156, test_acc: 0.6527, f1: 0.2733
loss: 0.8358, acc: 0.5547, test_acc: 0.6518, f1: 0.2730
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6554
loss: 1.0673, acc: 0.5375, test_acc: 0.6554, f1: 0.2953
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6687
loss: 0.9604, acc: 0.5443, test_acc: 0.6687, f1: 0.4316
loss: 0.8586, acc: 0.5625, test_acc: 0.6687, f1: 0.3545
model saved: ./state_dict/LSTM_restaurant_3class_acc0.7036
loss: 0.9229, acc: 0.5586, test_acc: 0.7036, f1: 0.4815
model saved: ./state_dict/LSTM_restaurant_3class_acc0.7116
loss: 0.9131, acc: 0.5625, test_acc: 0.7116, f1: 0.4838
loss: 0.6657, acc: 0.5781, test_acc: 0.6857, f1: 0.3936
loss: 0.6724, ac

loss: 0.3285, acc: 0.8496, test_acc: 0.7366, f1: 0.5942
loss: 0.4388, acc: 0.8385, test_acc: 0.7411, f1: 0.5921
loss: 0.3605, acc: 0.8453, test_acc: 0.7429, f1: 0.6105
loss: 0.3523, acc: 0.8466, test_acc: 0.7411, f1: 0.6236
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 11
loss: 0.4242, acc: 0.8438, test_acc: 0.7429, f1: 0.5817
loss: 0.3894, acc: 0.8438, test_acc: 0.7384, f1: 0.5831
loss: 0.3296, acc: 0.8490, test_acc: 0.7295, f1: 0.6007
loss: 0.2688, acc: 0.8516, test_acc: 0.7286, f1: 0.5831
loss: 0.4536, acc: 0.8500, test_acc: 0.7286, f1: 0.5819
loss: 0.5041, acc: 0.8255, test_acc: 0.7312, f1: 0.5923
loss: 0.3244, acc: 0.8326, test_acc: 0.7259, f1: 0.6152
loss: 0.3268, acc: 0.8359, test_acc: 0.7464, f1: 0.6082
loss: 0.3493, acc: 0.8403, test_acc: 0.7214, f1: 0.5734
loss: 0.2356, acc: 0.8438, test_acc: 0.7286, f1: 0.5884
loss: 0.4331, acc: 0.8381, test_acc: 0.7170, f1: 0.5691
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 12
loss: 0.2426, acc: 0.9219, test_acc: 0

### Run the latest saved model 

In [17]:
latest_file = sorted([os.path.join('state_dict',path) for path in os.listdir('state_dict')], key=os.path.getmtime)[-1]
checkpoints = torch.load(latest_file)
model.load_state_dict(checkpoints)
model

LSTM(
  (embed): Embedding(4445, 200)
  (lstm): DynamicLSTM(
    (RNN): LSTM(200, 200, batch_first=True)
  )
  (dense): Linear(in_features=200, out_features=3, bias=True)
)

In [18]:
sample_data = torch.tensor(tokenizer.text_to_sequence("Keyboard is great, very quiet for all the typing that I do."))
output = model(sample_data.reshape(1,1,-1))
polarity_dict[int(torch.argmax(output, -1))]

'positive'

#### Parameters needs to be set before runnning this model

In [19]:
epoch = 1
lr=0.001
l2_reg=1e-5
num_epoch = 20
input_cols = ['text', 'aspect']
log_step = 5
model_name = 'ae_lstm'
dataset = 'restaurant'
batch_size = 64
embed_dim = 200
hidden_dim = 200
polarities_dim = 3
polarity_dict = {0: 'positive', 1: 'negative', 2:'neutral'}

In [20]:
class SqueezeEmbedding(nn.Module):
    '''
    Squeeze sequence embedding length to the longest one in the batch
    '''
    def __init__(self, batch_first=True):
        super(SqueezeEmbedding, self).__init__()
        self.batch_first = batch_first
    
    def forward(self, x, x_len):
        '''
        sequence -> sort -> pad and pack -> unpack -> unsort
        '''
        '''sort'''
        x_sort_idx = torch.sort(x_len, descending=True)[1].long()
        x_unsort_idx = torch.sort(x_sort_idx)[1].long()
        x_len = x_len[x_sort_idx]
        x = x[x_sort_idx]
        '''pack'''
        x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first)
        '''unpack'''
        out, _ = torch.nn.utils.rnn.pad_packed_sequence(x_emb_p, batch_first=self.batch_first)
        if self.batch_first:
            out = out[x_unsort_idx]
        else:
            out = out[:, x_unsort_idx]
        return out

In [21]:
class AE_LSTM(nn.Module):
    ''' LSTM with Aspect Embedding '''
    def __init__(self, embedding_matrix):
        super(AE_LSTM, self).__init__()
        self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.squeeze_embedding = SqueezeEmbedding()
        self.lstm = DynamicLSTM(embed_dim*2, hidden_dim, num_layers=1, batch_first=True)
        self.dense = nn.Linear(hidden_dim, polarities_dim)
    
    def forward(self, inputs):
        text, aspect_text = inputs[0], inputs[1]
        x_len = torch.sum(text != 0, dim=-1)
        x_len_max = torch.max(x_len)
        aspect_len = torch.sum(aspect_text != 0, dim=-1).float()
        
        x = self.embed(text)
        x = self.squeeze_embedding(x, x_len)
        aspect = self.embed(aspect_text)
        aspect_pool = torch.div(torch.sum(aspect, dim=1), aspect_len.view(aspect_len.size(0), 1))
        aspect = torch.unsqueeze(aspect_pool, dim=1).expand(-1, x_len_max, -1)
        x = torch.cat((aspect, x), dim=-1)
        
        _, (h_n, _) = self.lstm(x, x_len)
        out = self.dense(h_n[0])
        return out

In [22]:
model_AE = AE_LSTM(embedding_matrix).to(device)

In [23]:
model_AE

AE_LSTM(
  (embed): Embedding(4445, 200)
  (squeeze_embedding): SqueezeEmbedding()
  (lstm): DynamicLSTM(
    (RNN): LSTM(400, 200, batch_first=True)
  )
  (dense): Linear(in_features=200, out_features=3, bias=True)
)

In [24]:
criterion = nn.CrossEntropyLoss()
params = filter(lambda p: p.requires_grad, model_AE.parameters())
optimizer = torch.optim.Adam(params, lr=lr, weight_decay=l2_reg)
writer_AE = SummaryWriter(f"runs/AE_LSTM/BatchSize {batch_size} LR {lr}")

In [25]:
run(model_AE , writer_AE)

repeat: 0
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 0
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6500
loss: 1.0001, acc: 0.6250, test_acc: 0.6500, f1: 0.2626
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6589
loss: 0.9891, acc: 0.5781, test_acc: 0.6589, f1: 0.3261
loss: 0.8990, acc: 0.6042, test_acc: 0.6491, f1: 0.4143
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6696
loss: 0.9310, acc: 0.6016, test_acc: 0.6696, f1: 0.3654
loss: 0.9313, acc: 0.6000, test_acc: 0.6607, f1: 0.3242
loss: 0.8754, acc: 0.6094, test_acc: 0.6607, f1: 0.3262
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6732
loss: 0.7158, acc: 0.6250, test_acc: 0.6732, f1: 0.3840
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6830
loss: 0.8197, acc: 0.6289, test_acc: 0.6830, f1: 0.4338
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6848
loss: 0.8430, acc: 0.6337, test_acc: 0.6848, f1: 0.4355
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6893
loss: 0.7542,

loss: 0.1931, acc: 0.9375, test_acc: 0.7580, f1: 0.6364
loss: 0.1698, acc: 0.9453, test_acc: 0.7598, f1: 0.6450
loss: 0.0787, acc: 0.9531, test_acc: 0.7607, f1: 0.6342
loss: 0.0822, acc: 0.9570, test_acc: 0.7536, f1: 0.6240
loss: 0.0526, acc: 0.9656, test_acc: 0.7482, f1: 0.6347
loss: 0.1479, acc: 0.9609, test_acc: 0.7580, f1: 0.6423
loss: 0.1523, acc: 0.9554, test_acc: 0.7554, f1: 0.6203
loss: 0.1667, acc: 0.9531, test_acc: 0.7518, f1: 0.6303
loss: 0.1054, acc: 0.9531, test_acc: 0.7268, f1: 0.6261
loss: 0.2059, acc: 0.9500, test_acc: 0.7464, f1: 0.6152
loss: 0.2988, acc: 0.9460, test_acc: 0.7446, f1: 0.5984
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 11
loss: 0.1377, acc: 0.9844, test_acc: 0.7411, f1: 0.6329
loss: 0.0771, acc: 0.9844, test_acc: 0.7366, f1: 0.6107
loss: 0.0763, acc: 0.9740, test_acc: 0.7509, f1: 0.6210
loss: 0.1150, acc: 0.9727, test_acc: 0.7527, f1: 0.6233
loss: 0.1075, acc: 0.9688, test_acc: 0.7473, f1: 0.6362
loss: 0.0590, acc: 0.9714, test_acc: 0.7562

In [26]:
sample_data = torch.tensor(tokenizer.text_to_sequence("MS Office 2011 for Mac is wonderful, well worth it.")).reshape(1,-1)
sample_aspect = torch.tensor(tokenizer.text_to_sequence('MS Office 2011 for Mac').reshape(1,-1))
data = [sample_data, sample_aspect]
output = model_AE(data)
polarity_dict[int(torch.argmax(output, -1))]

'positive'

In [27]:
max_length = 80
position_dim = 100

In [28]:
max_length

80

In [29]:
class PBAN(nn.Module):
    ''' Position-aware bidirectional attention network '''
    def __init__(self, embedding_matrix):
        super(PBAN, self).__init__()
        self.text_embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.pos_embed = nn.Embedding(max_length, position_dim)
        self.left_gru = DynamicLSTM(embed_dim, hidden_dim, num_layers=1, 
                                    batch_first=True, bidirectional=True, rnn_type='GRU')
        self.right_gru = DynamicLSTM(embed_dim+position_dim, hidden_dim, num_layers=1, 
                                     batch_first=True, bidirectional=True, rnn_type='GRU')
        self.weight_m = nn.Parameter(torch.Tensor(hidden_dim*2, hidden_dim*2))
        self.bias_m = nn.Parameter(torch.Tensor(1))
        self.weight_n = nn.Parameter(torch.Tensor(hidden_dim*2, hidden_dim*2))
        self.bias_n = nn.Parameter(torch.Tensor(1))
        self.w_r = nn.Linear(hidden_dim*2, hidden_dim)
        self.w_s = nn.Linear(hidden_dim, polarities_dim)
    
    def forward(self, inputs):
        text, aspect_text, position_tag = inputs[0], inputs[1], inputs[2]
        ''' Sentence representation '''
        x = self.text_embed(text)
        position = self.pos_embed(position_tag)
        x_len = torch.sum(text != 0, dim=-1)
        x = torch.cat((position, x), dim=-1)
        h_x, _ = self.right_gru(x, x_len)
        ''' Aspect term representation '''
        aspect = self.text_embed(aspect_text)
        aspect_len = torch.sum(aspect_text != 0, dim=-1)
        h_t, _ = self.left_gru(aspect, aspect_len)
        ''' Aspect term to position-aware sentence attention '''
        alpha = F.softmax(torch.tanh(torch.add(torch.bmm(torch.matmul(h_t, self.weight_m), torch.transpose(h_x, 1, 2)), self.bias_m)), dim=1)
        s_x = torch.bmm(alpha, h_x)
        ''' Position-aware sentence attention to aspect term '''
        h_x_pool = torch.unsqueeze(torch.div(torch.sum(h_x, dim=1), x_len.float().view(x_len.size(0), 1)), dim=1)
        gamma = F.softmax(torch.tanh(torch.add(torch.bmm(torch.matmul(h_x_pool, self.weight_n), torch.transpose(h_t, 1, 2)), self.bias_n)), dim=1)
        h_r = torch.squeeze(torch.bmm(gamma, s_x), dim=1)
        ''' Output transform '''
        out = torch.tanh(self.w_r(h_r))
        out = self.w_s(out)
        return out

In [30]:
model_PBAN = PBAN(embedding_matrix).to(device)

In [31]:
model_PBAN

PBAN(
  (text_embed): Embedding(4445, 200)
  (pos_embed): Embedding(80, 100)
  (left_gru): DynamicLSTM(
    (RNN): GRU(200, 200, batch_first=True, bidirectional=True)
  )
  (right_gru): DynamicLSTM(
    (RNN): GRU(300, 200, batch_first=True, bidirectional=True)
  )
  (w_r): Linear(in_features=400, out_features=200, bias=True)
  (w_s): Linear(in_features=200, out_features=3, bias=True)
)

In [32]:
epoch = 1
lr=0.001
l2_reg=1e-5
num_epoch = 20
input_cols = ['text', 'aspect', 'position']
log_step = 5
model_name = 'pban_lstm'
dataset = 'restaurant'
batch_size = 64
embed_dim = 200
hidden_dim = 200
polarities_dim = 3
polarity_dict = {0: 'positive', 1: 'negative', 2:'neutral'}

In [33]:
criterion = nn.CrossEntropyLoss()
params = filter(lambda p: p.requires_grad, model_PBAN.parameters())
optimizer = torch.optim.Adam(params, lr=lr, weight_decay=l2_reg)
writer_PBAN = SummaryWriter(f"runs/PBAN_LSTM/BatchSize {batch_size} LR {lr}")

In [34]:
run(model_PBAN , writer_PBAN)

repeat: 0
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 0
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6500
loss: 1.1307, acc: 0.5469, test_acc: 0.6500, f1: 0.2691
loss: 0.8421, acc: 0.5938, test_acc: 0.6500, f1: 0.2626
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6562
loss: 1.0627, acc: 0.5469, test_acc: 0.6562, f1: 0.2867
loss: 0.8875, acc: 0.5586, test_acc: 0.6500, f1: 0.2626
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6759
loss: 0.8043, acc: 0.5875, test_acc: 0.6759, f1: 0.3940
model saved: ./state_dict/LSTM_restaurant_3class_acc0.6920
loss: 0.8958, acc: 0.5781, test_acc: 0.6920, f1: 0.4041
loss: 0.8117, acc: 0.5848, test_acc: 0.6696, f1: 0.3415
loss: 0.9652, acc: 0.5801, test_acc: 0.6830, f1: 0.3817
model saved: ./state_dict/LSTM_restaurant_3class_acc0.7036
loss: 0.7061, acc: 0.5938, test_acc: 0.7036, f1: 0.4427
loss: 0.6050, acc: 0.6094, test_acc: 0.7027, f1: 0.4424
loss: 0.6967, acc: 0.6222, test_acc: 0.6991, f1: 0.4672
>>>>>>>>>>>>>>>>>>>

loss: 0.2652, acc: 0.9062, test_acc: 0.7634, f1: 0.6302
loss: 0.3887, acc: 0.8906, test_acc: 0.7812, f1: 0.6654
model saved: ./state_dict/LSTM_restaurant_3class_acc0.7839
loss: 0.2881, acc: 0.8958, test_acc: 0.7839, f1: 0.6728
model saved: ./state_dict/LSTM_restaurant_3class_acc0.7857
loss: 0.5305, acc: 0.8711, test_acc: 0.7857, f1: 0.6772
loss: 0.3146, acc: 0.8719, test_acc: 0.7688, f1: 0.6405
loss: 0.3242, acc: 0.8698, test_acc: 0.7786, f1: 0.6690
loss: 0.4808, acc: 0.8571, test_acc: 0.7616, f1: 0.6273
loss: 0.4441, acc: 0.8516, test_acc: 0.7688, f1: 0.6523
loss: 0.4525, acc: 0.8472, test_acc: 0.7839, f1: 0.6766
loss: 0.3154, acc: 0.8500, test_acc: 0.7821, f1: 0.6717
loss: 0.3845, acc: 0.8509, test_acc: 0.7759, f1: 0.6641
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch: 11
loss: 0.2400, acc: 0.8750, test_acc: 0.7741, f1: 0.6663
loss: 0.3753, acc: 0.8750, test_acc: 0.7616, f1: 0.6425
loss: 0.5164, acc: 0.8646, test_acc: 0.7616, f1: 0.6319
loss: 0.3767, acc: 0.8594, test_acc: 