In [2]:
import torch
!pip install transformers
import transformers
from transformers import BertTokenizer

from ast import literal_eval
import torch.utils.data as utils

import random
import argparse
import numpy as np

from sklearn.model_selection import train_test_split

import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel, BertForSequenceClassification

import copy
from sklearn.metrics import f1_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m92.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m107.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


# Tokenizer

In [3]:
def tokenize_sentences(bert_tokenizer, sentences, aspects, maxlen):
    """ converts sentences into ids according to bert tokenizer

    Arguments:
    bert_tokenizer (Tokenizer): Pretrained BERT Tokenizer
    sentences (list): List of sentences
    aspects (list): List of Aspects
    maxlen (int): Maximum Length of a sentence

    """
    input_ids = []
    attention_masks = []
    token_type_ids = []

    for sentence, aspect in zip(sentences, aspects):

        encoded = bert_tokenizer.encode_plus(text=sentence,
                                             text_pair=aspect,
                                             truncation=True,
                                             add_special_tokens=True,
                                             max_length=maxlen,
                                             pad_to_max_length=True,
                                             return_attention_mask=True,
                                             return_token_type_ids=True,
                                             return_tensors='pt')

        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
        token_type_ids.append(encoded['token_type_ids'])

    input_ids = torch.cat(input_ids, dim=0, out=None)
    attention_masks = torch.cat(attention_masks, dim=0, out=None)
    token_type_ids = torch.cat(token_type_ids, dim=0, out=None)

    return input_ids, attention_masks, token_type_ids

In [4]:
SEED = 1345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
numclasses = 3
dataset = "laptop"
datapath = "Data/atsa-laptop/"

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

cuda


In [7]:
def load_semeval(dataset, num_classes, data_path=None):
    """ Loads SemEval 14 datasets

    Arguments:
        dataset (str): Name of the datsets
        num_classes (int): Number of classes to consider
        data_path (str): Path to the dataset

    """
    label = {'negative': 0,
             'positive': 1,
             'neutral': 2,
             'conflict': 3}

    if(data_path is None):
        data_path = 'Data/atsa-laptop/'

    
    train_file = datapath + 'atsa_train.json'
    test_file = datapath + 'atsa_test.json'

    temp = open(train_file, 'r', encoding='latin1').read()
    train = literal_eval(temp)
    train_sentence = []
    train_aspect = []
    train_sentiment = []
    for xml in train:
        if(xml['sentiment'] == 'conflict' and num_classes == 3):
            continue
        train_sentence.append(xml['sentence'])
        train_aspect.append(xml['aspect'])
        train_sentiment.append(label[xml['sentiment']])

    temp = open(test_file, 'r', encoding='latin1').read()
    test = literal_eval(temp)
    test_sentence = []
    test_aspect = []
    test_sentiment = []
    for xml in test:
        if(xml['sentiment'] == 'conflict' and num_classes == 3):
            continue
        test_sentence.append(xml['sentence'])
        test_aspect.append(xml['aspect'])
        test_sentiment.append(label[xml['sentiment']])

    train_sen_len = [len(sentence.split()) for sentence in train_sentence]
    train_asp_len = [len(aspect.split()) for aspect in train_aspect]

    print("----------------------------------------")
    print("Maximum Training data Sentence Length: {}".format(
                                                       max(train_sen_len)))
    print("Maximum Training data Aspect Length: {}".format(max(train_asp_len)))
    print("----------------------------------------")

    return (train_sentence, train_aspect, train_sentiment, test_sentence,
            test_aspect, test_sentiment)


def get_loader(input_ids, attention_masks, token_type_ids, labels, batchsize):
    """ Converts input values into a dataloader

    Arguments:
    input_ids (Tensors): Sentences converted to input ids
    attention_masks (Tensors): Attention masks of the words in a sentence
    token_type_ids (Tensors): Token ids for pair of sentences
    labels (Tensors): Labels of the Dataset
    batchsize (int): Batch size to train

    """
    array = utils.TensorDataset(input_ids, attention_masks, token_type_ids,
                                labels)
    loader = utils.DataLoader(array, batch_size=batchsize)
    return loader

def get_loader_news(input_ids, attention_masks, token_type_ids, batchsize):
    """ Converts input values into a dataloader

    Arguments:
    input_ids (Tensors): Sentences converted to input ids
    attention_masks (Tensors): Attention masks of the words in a sentence
    token_type_ids (Tensors): Token ids for pair of sentences
    labels (Tensors): Labels of the Dataset
    batchsize (int): Batch size to train

    """
    array = utils.TensorDataset(input_ids, attention_masks, token_type_ids)
    loader = utils.DataLoader(array, batch_size=batchsize)
    return loader

In [8]:
(train_sentence, train_aspect, train_sentiment,
         test_sentence, test_aspect, test_sentiment) = load_semeval(dataset,
                                                                    numclasses,
                                                                    datapath)
(train_sentence, dev_sentence, train_aspect, dev_aspect,
 train_sentiment, dev_sentiment) = train_test_split(train_sentence,
                                                    train_aspect,
                                                    train_sentiment,
                                                    test_size=0.1,
                                                    random_state=42)

----------------------------------------
Maximum Training data Sentence Length: 69
Maximum Training data Aspect Length: 19
----------------------------------------


In [9]:
maxlen = 80
batch_size = 8

print("Training Data size: {}".format(len(train_sentence)))
print("Validation Data size: {}".format(len(dev_sentence)))
print("Test Data size: {}".format(len(test_sentence)))
print("------------------------------------------------")
# Returns Pretrained BERT Tokenizer
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                                   do_lower_case=True
                                                   )

print("Tokenizing training data")
(train_input_ids, train_attention_masks,
 train_token_type_ids) = tokenize_sentences(bert_tokenizer,
                                            train_sentence,
                                            train_aspect,
                                            maxlen)
train_labels = torch.from_numpy(np.asarray(train_sentiment, 'int32'))

print("Tokenizing validation data")
(dev_input_ids, dev_attention_masks,
 dev_token_type_ids) = tokenize_sentences(bert_tokenizer,
                                          dev_sentence,
                                          dev_aspect,
                                          maxlen)
dev_labels = torch.from_numpy(np.asarray(dev_sentiment, 'int32'))

print("Tokenizing test data")
(test_input_ids, test_attention_masks,
 test_token_type_ids) = tokenize_sentences(bert_tokenizer,
                                           test_sentence,
                                           test_aspect,
                                           maxlen)
test_labels = torch.from_numpy(np.asarray(test_sentiment, 'int32'))
print("-------------------------------------------------")
train_loader = get_loader(train_input_ids, train_attention_masks,
                          train_token_type_ids, train_labels,
                          batch_size)
dev_loader = get_loader(dev_input_ids, dev_attention_masks,
                        dev_token_type_ids, dev_labels,
                        batch_size)
test_loader = get_loader(test_input_ids, test_attention_masks,
                         test_token_type_ids, test_labels,
                         batch_size)


Training Data size: 3241
Validation Data size: 361
Test Data size: 1120
------------------------------------------------


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Tokenizing training data


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Tokenizing validation data
Tokenizing test data
-------------------------------------------------


# Model

In [10]:
# MODELS: base architecture based on https://github.com/avinashsai/BERT-Aspect

fchidden = 256
hiddendim_lstm = 128
embeddim = 768
numlayers = 12
  
class Bert_LSTM(nn.Module):
    def __init__(self, numclasses):
        super(Bert_LSTM, self).__init__()
        print("USING BERT_LSTM")
        self.numclasses = numclasses
        self.embeddim = embeddim
        self.numlayers = numlayers
        self.hiddendim_lstm = hiddendim_lstm
        self.dropout = nn.Dropout(0.1)

        self.bert = BertModel.from_pretrained('bert-base-uncased',
                                              output_hidden_states=True,
                                              output_attentions=False)
        self.lstm = nn.LSTM(self.embeddim, self.hiddendim_lstm, batch_first=True) # noqa
        self.fc = nn.Linear(self.hiddendim_lstm, self.numclasses)

    def forward(self, inp_ids, att_mask, token_ids):
        o = self.bert(input_ids=inp_ids,
                                          attention_mask=att_mask,
                                          token_type_ids=token_ids)
        hidden_states = o["hidden_states"]
        
        hidden_states = torch.stack([hidden_states[layer_i][:, 0].squeeze()
                                     for layer_i in range(0, self.numlayers)], dim=-1) # noqa
        hidden_states = hidden_states.view(-1, self.numlayers, self.embeddim)
        out, _ = self.lstm(hidden_states, None)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

In [11]:
#try with post trained model
class Bert_PT_LSTM(nn.Module):
    def __init__(self, numclasses):
        super(Bert_PT_LSTM, self).__init__()
        print("USING BERT_PT_LSTM")
        self.numclasses = numclasses
        self.embeddim = embeddim
        self.numlayers = numlayers
        self.hiddendim_lstm = hiddendim_lstm
        self.dropout = nn.Dropout(0.1)

        #"drive/MyDrive/Colab Notebooks/Data/laptop_pt"
        self.bert = BertModel.from_pretrained("Data/laptop_pt",
                                              output_hidden_states=True,
                                              output_attentions=False)
        
        self.lstm = nn.LSTM(self.embeddim, self.hiddendim_lstm, batch_first=True) # noqa
        self.fc = nn.Linear(self.hiddendim_lstm, self.numclasses)

    def forward(self, inp_ids, att_mask, token_ids):
        o = self.bert(input_ids=inp_ids,
                                          attention_mask=att_mask,
                                          token_type_ids=token_ids)
        hidden_states = o["hidden_states"]
        
        hidden_states = torch.stack([hidden_states[layer_i][:, 0].squeeze()
                                     for layer_i in range(0, self.numlayers)], dim=-1) # noqa
        hidden_states = hidden_states.view(-1, self.numlayers, self.embeddim)
        out, _ = self.lstm(hidden_states, None)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out


In [12]:
#attention models
class Bert_Attention(nn.Module):
    def __init__(self, numclasses, device):
        super(Bert_Attention, self).__init__()
        print("USING BERT_ATTENTION")
        self.numclasses = numclasses
        self.embeddim = embeddim
        self.numlayers = numlayers
        self.fchidden = fchidden
        self.dropout = nn.Dropout(0.1)

        self.bert = BertModel.from_pretrained('bert-base-uncased',
                                              output_hidden_states=True,
                                              output_attentions=False)
        print("BERT Model Loaded - Attention")

        q_t = np.random.normal(loc=0.0, scale=0.1, size=(1, self.embeddim))
        self.q = nn.Parameter(torch.from_numpy(q_t)).float().to(device)
        w_ht = np.random.normal(loc=0.0, scale=0.1, size=(self.embeddim, self.fchidden)) # noqa
        self.w_h = nn.Parameter(torch.from_numpy(w_ht)).float().to(device)

        self.fc = nn.Linear(self.fchidden, self.numclasses)

    def forward(self, inp_ids, att_mask, token_ids):
        o = self.bert(input_ids=inp_ids,
                                          attention_mask=att_mask,
                                          token_type_ids=token_ids)
        hidden_states = o["hidden_states"]
        hidden_states = torch.stack([hidden_states[layer_i][:, 0].squeeze()
                                     for layer_i in range(0, self.numlayers)], dim=-1) # noqa
        hidden_states = hidden_states.view(-1, self.numlayers, self.embeddim)
        out = self.attention(hidden_states)
        out = self.dropout(out)
        out = self.fc(out)
        return out

    def attention(self, h):
        v = torch.matmul(self.q, h.transpose(-2, -1)).squeeze(1)
        v = F.softmax(v, -1)
        v_temp = torch.matmul(v.unsqueeze(1), h).transpose(-2, -1)
        v = torch.matmul(self.w_h.transpose(1, 0), v_temp).squeeze(2)
        return v


In [13]:
class Bert_PT_Attention(nn.Module):
    def __init__(self, numclasses, device):
        super(Bert_PT_Attention, self).__init__()
        print("USING BERT_PT_ATTENTION")
        self.numclasses = numclasses
        self.embeddim = embeddim
        self.numlayers = numlayers
        self.fchidden = fchidden
        self.dropout = nn.Dropout(0.1)

        self.bert = BertModel.from_pretrained("Data/laptop_pt",
                                              output_hidden_states=True,
                                              output_attentions=False)

        q_t = np.random.normal(loc=0.0, scale=0.1, size=(1, self.embeddim))
        self.q = nn.Parameter(torch.from_numpy(q_t)).float().to(device)
        w_ht = np.random.normal(loc=0.0, scale=0.1, size=(self.embeddim, self.fchidden)) # noqa
        self.w_h = nn.Parameter(torch.from_numpy(w_ht)).float().to(device)

        self.fc = nn.Linear(self.fchidden, self.numclasses)

    def forward(self, inp_ids, att_mask, token_ids):
        o = self.bert(input_ids=inp_ids,
                                          attention_mask=att_mask,
                                          token_type_ids=token_ids)
        hidden_states = o["hidden_states"]
        hidden_states = torch.stack([hidden_states[layer_i][:, 0].squeeze()
                                     for layer_i in range(0, self.numlayers)], dim=-1) # noqa
        hidden_states = hidden_states.view(-1, self.numlayers, self.embeddim)
        out = self.attention(hidden_states)
        out = self.dropout(out)
        out = self.fc(out)
        return out

    def attention(self, h):
        v = torch.matmul(self.q, h.transpose(-2, -1)).squeeze(1)
        v = F.softmax(v, -1)
        v_temp = torch.matmul(v.unsqueeze(1), h).transpose(-2, -1)
        v = torch.matmul(self.w_h.transpose(1, 0), v_temp).squeeze(2)
        return v

# Train

In [14]:
def evaluate(loader, net, device, model_name):
    """ Evaluates a model and returns loss, accuracy

    Arguments:
    loader (DataLoader): dataloader to evaluate
    net (nn.Module): Model to evaluate
    device (torch.device): Device type
    model_name (str): Model name

    """
    net.eval()

    with torch.no_grad():
        loss = 0.0
        total = 0.0
        acc = 0.0
        y_pred = []
        y_true = []
        for input_id, attention_masks, token_ids, labels in loader:
            input_id = input_id.to(device)
            attention_masks = attention_masks.to(device)
            token_ids = token_ids.to(device)
            labels = labels.long().to(device)

            if(model_name == 'base'):
                curloss, output = net(input_id, attention_masks, token_ids, labels)# noqa
            else:
                output = net(input_id, attention_masks, token_ids)
                curloss = F.cross_entropy(output, labels, reduction='sum')
            loss += curloss.item()
            preds = torch.argmax(output, 1)
            y_pred.extend(preds.tolist())
            y_true.extend(labels.tolist())
            acc += torch.sum(preds == labels).item()
            total += input_id.size(0)

        F1 = round((f1_score(y_true, y_pred, average='macro')), 2) * 100
        return y_true, y_pred, round((loss / total), 3), round(((acc / total) * 100), 2), F1
    
    
    
def train_model(train_loader, dev_loader, news_loader, model_name,
                numclasses, numepochs, runs, device):
    """ Trains the neural network

    Arguments:
    train_loader (DataLoader): Training Data Loader
    dev_loader (DataLoader): Validation Data Loader
    test_loader (DataLoader): Test Data Loader
    model_name (str): Name of the model to train
    numclasses (int): Number of classes in the data
    numepochs (int): Number of epochs to train
    runs (int): Number of runs to report averaged results
    device (torch.device): Device type

    """
    avg_testacc = 0.0
    avg_testf1 = 0.0
    for run in range(1, runs+1):
        print("Training for run {} ".format(run))
        print("--------------------------------------------")
        if(model_name == 'lstm'):
            model = Bert_LSTM(numclasses).to(device)
        elif(model_name == 'attention'):
            model = Bert_Attention(numclasses, device).to(device)
        elif(model_name == 'pt_lstm'):
            model = Bert_PT_LSTM(numclasses).to(device)
        elif(model_name == 'pt_attention'):
            model = Bert_PT_Attention(numclasses, device).to(device)
        else:
            model = Bert_LSTM(numclasses).to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

        model.train()
        valbest = 0.0
        best_model_wts = copy.deepcopy(model.state_dict())

        for epoch in range(1, numepochs+1):
            model.train()
            for input_id, attention_masks, token_ids, labels in train_loader:
                input_id = input_id.to(device)
                attention_masks = attention_masks.to(device)
                token_ids = token_ids.to(device)
                labels = labels.long().to(device)

                model.zero_grad()

                output = model(input_id, attention_masks, token_ids)
                loss = F.cross_entropy(output, labels)
                
                loss.backward()
                optimizer.step()
            

            _, _, valloss, valacc, _ = evaluate(dev_loader, model, device, model_name) # noqa
            if(valacc > valbest):
                valbest = valacc
                best_model_wts = copy.deepcopy(model.state_dict())

            print("Epoch {} Val Loss {} Val Acc {} ".format(epoch,
                                                            valloss,
                                                            valacc))

        model.load_state_dict(best_model_wts)

        preds, trues, curtestloss, curtestacc, curtestf1 = evaluate(test_loader,
                                                      model, device,
                                                      model_name)

        print("Run {} Test Accuracy {} F1 Score {}".format(run,
                                                           curtestacc,
                                                           curtestf1))
        print("---------------------------------------------------")
        avg_testacc += curtestacc
        avg_testf1 += curtestf1

    print("Average Test Accuracy: {} F1: {} ".format(avg_testacc/runs,
                                                     avg_testf1/runs))
    return model

In [15]:
#train model variants 
model_name = "lstm"
numepochs = 10
runs = 1
model_lstm = train_model(train_loader, dev_loader, test_loader, model_name,
                numclasses, numepochs, runs, DEVICE)


Training for run 1 
--------------------------------------------
USING BERT_LSTM


Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1 Val Loss 0.547 Val Acc 78.67 
Epoch 2 Val Loss 0.5 Val Acc 80.33 
Epoch 3 Val Loss 0.546 Val Acc 81.72 
Epoch 4 Val Loss 0.616 Val Acc 80.61 
Epoch 5 Val Loss 0.684 Val Acc 80.61 
Epoch 6 Val Loss 0.677 Val Acc 79.5 
Epoch 7 Val Loss 0.733 Val Acc 80.06 
Epoch 8 Val Loss 0.82 Val Acc 80.61 
Epoch 9 Val Loss 0.76 Val Acc 81.99 
Epoch 10 Val Loss 0.849 Val Acc 82.83 
Run 1 Test Accuracy 82.77 F1 Score 73.0
---------------------------------------------------
Average Test Accuracy: 82.77 F1: 73.0 


In [None]:
model_name = "pt_lstm"
numepochs = 10
runs = 1
model_pt_lstm = train_model(train_loader, dev_loader, test_loader, model_name,
                numclasses, numepochs, runs, DEVICE)

In [None]:
model_name = "attention"
numepochs = 10
runs = 1
model_attention = train_model(train_loader, dev_loader, test_loader, model_name,
                numclasses, numepochs, runs, DEVICE)

Training for run 1 
--------------------------------------------


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERT Model Loaded - Attention
Epoch 1 Val Loss 0.694 Val Acc 67.67 
Epoch 2 Val Loss 0.547 Val Acc 75.0 
Epoch 3 Val Loss 0.658 Val Acc 75.0 
Epoch 4 Val Loss 0.678 Val Acc 75.43 
Epoch 5 Val Loss 0.816 Val Acc 77.59 
Epoch 6 Val Loss 0.947 Val Acc 77.16 
Epoch 7 Val Loss 0.929 Val Acc 77.59 
Epoch 8 Val Loss 0.879 Val Acc 76.72 
Epoch 9 Val Loss 1.047 Val Acc 77.59 
Epoch 10 Val Loss 0.997 Val Acc 75.86 
Run 1 Test Accuracy 73.67 F1 Score 67.0
---------------------------------------------------
Average Test Accuracy: 73.67 F1: 67.0 


In [None]:
model_name = "pt_attention"
numepochs = 10
runs = 1
model_pt_attention = train_model(train_loader, dev_loader, test_loader, model_name,
                numclasses, numepochs, runs, DEVICE)

Training for run 1 
--------------------------------------------


Some weights of the model checkpoint at drive/MyDrive/Colab Notebooks/Data/laptop_pt were not used when initializing BertModel: ['qa_outputs.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'qa_outputs.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERT Model Loaded - Attention
Epoch 1 Val Loss 0.624 Val Acc 71.55 
Epoch 2 Val Loss 0.592 Val Acc 75.43 
Epoch 3 Val Loss 0.594 Val Acc 75.0 
Epoch 4 Val Loss 0.743 Val Acc 77.16 
Epoch 5 Val Loss 0.754 Val Acc 79.31 
Epoch 6 Val Loss 0.971 Val Acc 75.86 
Epoch 7 Val Loss 0.914 Val Acc 76.72 
Epoch 8 Val Loss 0.931 Val Acc 77.16 
Epoch 9 Val Loss 0.894 Val Acc 78.02 
Epoch 10 Val Loss 0.946 Val Acc 78.88 
Run 1 Test Accuracy 75.71 F1 Score 70.0
---------------------------------------------------
Average Test Accuracy: 75.71 F1: 70.0 


In [23]:
def evalu(loader, net, device, model_name):
    """ Evaluates a model and returns loss, accuracy

    Arguments:
    loader (DataLoader): dataloader to evaluate
    net (nn.Module): Model to evaluate
    device (torch.device): Device type
    model_name (str): Model name

    """
    net.eval()

    with torch.no_grad():
        loss = 0.0
        total = 0.0
        acc = 0.0
        y_pred = []
        for input_id, attention_masks, token_ids in loader:
            input_id = input_id.to(device)
            attention_masks = attention_masks.to(device)
            token_ids = token_ids.to(device)

            output = net(input_id, attention_masks, token_ids)
            
            preds = torch.argmax(output, 1)
            y_pred.extend(preds.tolist())
            total += input_id.size(0)

        return y_pred

In [18]:
import spacy
import nltk
import pandas as pd

news = pd.read_csv('pyabsa_polarities_data.csv', usecols=['headline', 'category', 'short_description','aspects', 'polarities'])

In [None]:
news

Unnamed: 0,headline,category,short_description,aspects,polarities
0,Puerto Ricans Desperate For Water After Hurric...,WORLD NEWS,More than half a million people remained witho...,['us'],['Neutral']
1,Biden At UN To Call Russian War An Affront To ...,WORLD NEWS,White House officials say the crux of the pres...,[],[]
2,World Cup Captains Want To Wear Rainbow Armban...,WORLD NEWS,FIFA has come under pressure from several Euro...,['fifa'],['Neutral']
3,Man Sets Himself On Fire In Apparent Protest O...,WORLD NEWS,The incident underscores a growing wave of pro...,['japans'],['Negative']
4,Fiona Threatens To Become Category 4 Storm Hea...,WORLD NEWS,Hurricane Fiona lashed the Turks and Caicos Is...,[],[]
...,...,...,...,...,...
11439,Jesse Williams Set To Be Honored With Humanita...,BLACK VOICES,While Jesse Williams deserves plenty of praise...,['jackson'],['Positive']
11440,"Gabrielle Union Talks Success, Self-Love With ...",BLACK VOICES,"At the American Black Film Festival, the actre...",[],[]
11441,"They Dared To Register Blacks To Vote, And The...",BLACK VOICES,A former Klansman is serving what amounts to a...,['kkk'],['Negative']
11442,How 'Orange Is the New Black' Tackled Black Li...,BLACK VOICES,"""Orange Is the New Black"" closed its fourth se...",['ending'],['Positive']


In [20]:
news['text'] = news['headline'] + ". " + news['short_description']
news['text'] = news['text'].apply(lambda x: ' '.join([word for word in str(x).split()]))

In [21]:
news

Unnamed: 0,headline,category,short_description,aspects,polarities,text
0,Puerto Ricans Desperate For Water After Hurric...,WORLD NEWS,More than half a million people remained witho...,['us'],['Neutral'],Puerto Ricans Desperate For Water After Hurric...
1,Biden At UN To Call Russian War An Affront To ...,WORLD NEWS,White House officials say the crux of the pres...,[],[],Biden At UN To Call Russian War An Affront To ...
2,World Cup Captains Want To Wear Rainbow Armban...,WORLD NEWS,FIFA has come under pressure from several Euro...,['fifa'],['Neutral'],World Cup Captains Want To Wear Rainbow Armban...
3,Man Sets Himself On Fire In Apparent Protest O...,WORLD NEWS,The incident underscores a growing wave of pro...,['japans'],['Negative'],Man Sets Himself On Fire In Apparent Protest O...
4,Fiona Threatens To Become Category 4 Storm Hea...,WORLD NEWS,Hurricane Fiona lashed the Turks and Caicos Is...,[],[],Fiona Threatens To Become Category 4 Storm Hea...
...,...,...,...,...,...,...
11439,Jesse Williams Set To Be Honored With Humanita...,BLACK VOICES,While Jesse Williams deserves plenty of praise...,['jackson'],['Positive'],Jesse Williams Set To Be Honored With Humanita...
11440,"Gabrielle Union Talks Success, Self-Love With ...",BLACK VOICES,"At the American Black Film Festival, the actre...",[],[],"Gabrielle Union Talks Success, Self-Love With ..."
11441,"They Dared To Register Blacks To Vote, And The...",BLACK VOICES,A former Klansman is serving what amounts to a...,['kkk'],['Negative'],"They Dared To Register Blacks To Vote, And The..."
11442,How 'Orange Is the New Black' Tackled Black Li...,BLACK VOICES,"""Orange Is the New Black"" closed its fourth se...",['ending'],['Positive'],How 'Orange Is the New Black' Tackled Black Li...


In [22]:
import en_core_web_sm
nlp = en_core_web_sm.load()

In [None]:
#try with https://towardsdatascience.com/aspect-based-sentiment-analysis-using-spacy-textblob-4c8de3e0d2b9
aspects = []
row_indices_with_aspects = []
no_target = 0
for i,j in news.iterrows():
    curr_row_aspects = []
    sentence = j['text']
    doc = nlp(sentence)
    target = ''
    found_target = False
    for token in doc:
        if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
            target = token.text
            found_target = True
            curr_row_aspects.append(target)
    if(found_target):
        aspects.append(curr_row_aspects)
        row_indices_with_aspects.append(i)
    else:
        no_target += 1
print(no_target)

4312


In [None]:
aspects

[['people', 'storm'],
 ['officials', 'crux'],
 ['Man', 'incident'],
 ['trip'],
 ['government', 'storm'],
 ['storm', 'governor', 'damage'],
 ['offensive'],
 ['forces'],
 ['newspapers'],
 ['countries'],
 ['fire'],
 ['head'],
 ['finding', 'administration', 'military'],
 ['Authorities'],
 ['funeral'],
 ['authorities', 'season'],
 ['Goods', 'companies'],
 ['announcement', 'agency'],
 ['invasion'],
 ['charges'],
 ['court', 'woman'],
 ['schools', 'tampons'],
 ['Poverty'],
 ['fatwa'],
 ['Lawyers', 'Lawyers', 'sentence'],
 ['attack'],
 ['agent'],
 ['veterinarian', 'mammal'],
 ['cloud'],
 ['newspaper'],
 ['Police', 'group'],
 ['activists', 'bullfights'],
 ['tour'],
 ['units'],
 ['Soldier'],
 ['forces'],
 ['announcement', 'alliance'],
 ['steps', 'lockdown'],
 ['namesake'],
 ['son'],
 ['crews'],
 ['forces'],
 ['election', 'patriots'],
 ['world'],
 ['Leaders'],
 ['fight'],
 ['victory', 'campaign'],
 ['consortium'],
 ['judge'],
 ['authorities'],
 ['Defenders'],
 ['stabbing'],
 ['centerpiece'],
 ['of

In [None]:
#get new df with only rows with aspect found
news_aspect = news.iloc[row_indices_with_aspects]

In [None]:
news_aspect['new_aspects'] = aspects

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  news_aspect['new_aspects'] = aspects


In [None]:
def mod_news_data2(news):
    text = news['text'].values
    new_aspects = news['new_aspects'].values
    categories = news['category'].values
    
    mod_text = []
    mod_aspects = []
    mod_categories = []
    
    for i, aspect_list in enumerate(new_aspects):
        for aspect in aspect_list:
            mod_text.append(text[i])
            mod_aspects.append(aspect)
            mod_categories.append(categories[i])
            
    mod_news = pd.DataFrame()
    mod_news['mod_text'] = mod_text
    mod_news['mod_aspects'] = mod_aspects
    mod_news['mod_categories'] = mod_categories
    
    return mod_news


In [None]:
mod_news2 = mod_news_data2(news_aspect)

In [None]:
print(len(mod_news2))

9585


In [None]:
mod_news2.head(50)

Unnamed: 0,mod_text,mod_aspects,mod_categories
0,Puerto Ricans Desperate For Water After Hurric...,people,WORLD NEWS
1,Puerto Ricans Desperate For Water After Hurric...,storm,WORLD NEWS
2,Biden At UN To Call Russian War An Affront To ...,officials,WORLD NEWS
3,Biden At UN To Call Russian War An Affront To ...,crux,WORLD NEWS
4,Man Sets Himself On Fire In Apparent Protest O...,Man,WORLD NEWS
5,Man Sets Himself On Fire In Apparent Protest O...,incident,WORLD NEWS
6,Russian Cosmonaut Valery Polyakov Who Broke Re...,trip,WORLD NEWS
7,Fiona Barrels Toward Turks And Caicos Islands ...,government,WORLD NEWS
8,Fiona Barrels Toward Turks And Caicos Islands ...,storm,WORLD NEWS
9,Hurricane Fiona Bears Down On Dominican Republ...,storm,WORLD NEWS


In [None]:
(news_input_ids2, news_attention_masks2,
 news_token_type_ids2) = tokenize_sentences(bert_tokenizer,
                                            mod_news2['mod_text'],
                                            mod_news2['mod_aspects'],
                                            maxlen)
 
news_loader2 = get_loader_news(news_input_ids2, news_attention_masks2,
                         news_token_type_ids2,
                         batch_size)

In [None]:
#get predictions for difference models 
print('Running LSTM')
preds_lstm = evalu(news_loader2, model_lstm, DEVICE, 'lstm')
print('Running Attention')
preds_attention = evalu(news_loader2, model_attention, DEVICE, 'attention')
print('Running PT LSTM')
preds_pt_lstm = evalu(news_loader2, model_pt_lstm, DEVICE, 'pt_lstm')
print('Running PT Attention')
preds_pt_attention = evalu(news_loader2, model_pt_attention, DEVICE, 'pt_attention')

Running LSTM
Running Attention
Running PT LSTM
Running PT Attention


In [None]:
#add to dataframe
mod_news2['mod_preds_lstm'] = preds_lstm
mod_news2['mod_preds_attention'] = preds_attention
mod_news2['mod_preds_pt_lstm'] = preds_pt_lstm
mod_news2['mod_preds_pt_attention'] = preds_pt_attention

In [None]:
mod_news2.head(50)

Unnamed: 0,mod_text,mod_aspects,mod_categories,mod_preds_lstm,mod_preds_attention,mod_preds_pt_lstm,mod_preds_pt_attention
0,Puerto Ricans Desperate For Water After Hurric...,people,WORLD NEWS,0,0,0,0
1,Puerto Ricans Desperate For Water After Hurric...,storm,WORLD NEWS,0,0,0,0
2,Biden At UN To Call Russian War An Affront To ...,officials,WORLD NEWS,0,0,0,0
3,Biden At UN To Call Russian War An Affront To ...,crux,WORLD NEWS,0,0,0,0
4,Man Sets Himself On Fire In Apparent Protest O...,Man,WORLD NEWS,0,0,0,0
5,Man Sets Himself On Fire In Apparent Protest O...,incident,WORLD NEWS,0,0,0,0
6,Russian Cosmonaut Valery Polyakov Who Broke Re...,trip,WORLD NEWS,1,0,0,2
7,Fiona Barrels Toward Turks And Caicos Islands ...,government,WORLD NEWS,0,0,0,0
8,Fiona Barrels Toward Turks And Caicos Islands ...,storm,WORLD NEWS,0,0,0,0
9,Hurricane Fiona Bears Down On Dominican Republ...,storm,WORLD NEWS,0,0,0,0


In [None]:
# mod_news2.to_csv('mod_news2.csv')

In [None]:
# test_i = 19
# print(mod_news2.iloc[test_i]['mod_text'], mod_news2.iloc[test_i]['mod_aspects'], mod_news2.iloc[test_i]['mod_preds'])

U.S.: Russia To Buy Rockets, Artillery Shells From North Korea. The finding comes after the Biden administration confirmed that the Russian military in August took delivery of Iranian-manufactured drones for use in Ukraine. administration 2


In [None]:
# test_i = 14
# print(mod_news2.iloc[test_i]['mod_text'], mod_news2.iloc[test_i]['mod_aspects'], mod_news2.iloc[test_i]['mod_preds'])

'Our Hearts Are Broken': Historic Front Pages Mark The Queen's Death. Both British and international newspapers honor the passing of the U.K'.s longest-reigning monarch. newspapers 1


In [None]:
model_name = "pt_lstm"
numepochs = 10
runs = 1

In [None]:
model = train_model(train_loader, dev_loader, test_loader, model_name,
                numclasses, numepochs, runs, DEVICE)

Training for run 1 
--------------------------------------------


Some weights of the model checkpoint at drive/MyDrive/Colab Notebooks/Data/laptop_pt were not used when initializing BertModel: ['qa_outputs.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'qa_outputs.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERT Model Loaded
Epoch 1 Val Loss 0.613 Val Acc 74.57 
Epoch 2 Val Loss 0.559 Val Acc 76.72 
Epoch 3 Val Loss 0.543 Val Acc 79.31 
Epoch 4 Val Loss 0.645 Val Acc 79.31 
Epoch 5 Val Loss 0.858 Val Acc 75.86 
Epoch 6 Val Loss 0.867 Val Acc 78.45 
Epoch 7 Val Loss 0.914 Val Acc 77.59 
Epoch 8 Val Loss 0.863 Val Acc 78.45 
Epoch 9 Val Loss 0.794 Val Acc 80.17 
Epoch 10 Val Loss 0.846 Val Acc 78.02 
Run 1 Test Accuracy 76.02 F1 Score 71.0
---------------------------------------------------
Average Test Accuracy: 76.02 F1: 71.0 


In [None]:
preds2_pt = evalu(news_loader2, model, DEVICE, model_name)

In [None]:
# mod_news2['mod_preds_pt_lstm'] = preds2

In [None]:
# mod_news2.head(50)

Unnamed: 0,mod_text,mod_aspects,mod_categories,mod_preds_pt_lstm
0,Puerto Ricans Desperate For Water After Hurric...,people,WORLD NEWS,0
1,Puerto Ricans Desperate For Water After Hurric...,storm,WORLD NEWS,0
2,Biden At UN To Call Russian War An Affront To ...,officials,WORLD NEWS,0
3,Biden At UN To Call Russian War An Affront To ...,crux,WORLD NEWS,0
4,Man Sets Himself On Fire In Apparent Protest O...,Man,WORLD NEWS,0
5,Man Sets Himself On Fire In Apparent Protest O...,incident,WORLD NEWS,0
6,Russian Cosmonaut Valery Polyakov Who Broke Re...,trip,WORLD NEWS,1
7,Fiona Barrels Toward Turks And Caicos Islands ...,government,WORLD NEWS,0
8,Fiona Barrels Toward Turks And Caicos Islands ...,storm,WORLD NEWS,0
9,Hurricane Fiona Bears Down On Dominican Republ...,storm,WORLD NEWS,0


In [None]:
# test_i = 48
# print(mod_news2.iloc[test_i]['mod_text'], mod_news2.iloc[test_i]['mod_aspects'], mod_news2.iloc[test_i]['mod_preds_pt_lstm'])

No One Gored In First Pamplona Running Of The Bulls In 3 Years. Animal rights activists have campaigned against the slaughter of the animals, but bullfights are still a part of the San Fermín festival. activists 0


In [None]:
# test_i = 49
# print(mod_news2.iloc[test_i]['mod_text'], mod_news2.iloc[test_i]['mod_aspects'], mod_news2.iloc[test_i]['mod_preds_pt_lstm'])

No One Gored In First Pamplona Running Of The Bulls In 3 Years. Animal rights activists have campaigned against the slaughter of the animals, but bullfights are still a part of the San Fermín festival. bullfights 1
