In [None]:
# Install dependencies
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.19.2-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 5.0 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 54.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.7.0-py3-none-any.whl (86 kB)
[K     |████████████████████████████████| 86 kB 6.2 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 13.0 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Unins

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M

/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M


In [None]:
import json
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import transformers
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from transformers import BertModel, BertTokenizer

import logging
logging.basicConfig(level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore")

In [None]:
class SentimentClassifier(nn.Module):
  """
  This class defines the model architecture which is simply a fully-connected
  layer on top of a pre-trained BERT model. 
  """

  def __init__(self, BERT_MODEL):
    super(SentimentClassifier, self).__init__()
    self.bert = BertModel.from_pretrained(BERT_MODEL)
    self.drop = nn.Dropout(p=0.3)
    self.out = nn.Linear(self.bert.config.hidden_size, 3)
    # Number of output classes = 3

  def forward(self, ids, mask, token_type_ids):
    last_hidden_state, pooled_output = self.bert(ids, attention_mask=mask,
                                                 token_type_ids=token_type_ids,
                                                 return_dict=False)
    output = self.drop(pooled_output)
    return self.out(output)

# Inference on BERT-pair NLI-M

In [None]:
class SentiHood:
  """
  This class tokenizes the input text using the pre-trained BERT tokenizer 
  (wordpiece) and returns the corresponding tensors.
  """
  
  def __init__(self, opinions_id, text, auxiliary_sentence, targets, tokenizer, max_len):
    self.opinions_id = opinions_id
    self.text = text
    self.auxiliary_sentence = auxiliary_sentence
    self.tokenizer = tokenizer
    self.max_len = max_len
    self.targets = targets

  def __len__(self):
    return len(self.targets)

  def __getitem__(self, item):
    opinions_id = self.opinions_id[item]
    text = str(self.text[item])
    auxiliary_sentence = str(self.auxiliary_sentence[item])
    targets = self.targets[item]

    text = text + ' ' + auxiliary_sentence

    inputs = self.tokenizer.encode_plus(
        text,
        add_special_tokens = True,
        max_length = self.max_len,
        pad_to_max_length = True
    )

    ids = inputs["input_ids"]
    mask = inputs["attention_mask"]
    token_type_ids = inputs["token_type_ids"]

    return {
        "ids": torch.tensor(ids, dtype=torch.long),
        "mask": torch.tensor(mask, dtype=torch.long),
        "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
        "targets": torch.tensor(targets, dtype=torch.long),
        "opinions_id": torch.tensor(opinions_id, dtype=torch.long)
    }

In [None]:
def infer_loop_function(data_loader, model, device):
  """
  This function performs the inference on testing sets and stores the predicted
  values.
  """

  model.eval()

  df_pred = pd.DataFrame({"id": [], "predicted": [], "actual": []})

  ii = 0
  for bi, d in tqdm(enumerate(data_loader), total=len(data_loader), ncols=80):
    opinions_id = d["opinions_id"]
    ids = d["ids"]
    mask = d["mask"]
    token_type_ids = d["token_type_ids"]
    targets = d["targets"]

    opinions_id = opinions_id.to(device, dtype=torch.long)
    ids = ids.to(device, dtype=torch.long)
    mask = mask.to(device, dtype=torch.long)
    token_type_ids = token_type_ids.to(device, dtype=torch.long)
    targets = targets.to(device, dtype=torch.long)

    outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)
    _, predicted = torch.max(outputs, 1)
    
    predicted = predicted.detach().cpu().numpy()
    targets = targets.detach().cpu().numpy()
    opinions_id = opinions_id.detach().cpu().numpy()

    for k in range(len(predicted)):
      df_pred.loc[ii] = [str(opinions_id[k]), str(predicted[k]), str(targets[k])]
      ii += 1

    df_pred.to_csv('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/PredictedValues.csv', index=False)

In [None]:
def run():
  """
  This function defines the necessary hyperparameters and models. It also 
  loads and tokenizes the testing dataset and execute the inference procedure.
  """

  TRAIN_MAX_LEN = 160
  TRAIN_BATCH_SIZE = 24
  BERT_MODEL = 'bert-base-uncased'

  testing_set_path = '/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/Datasets/testing_set.csv'

  df_test = pd.read_csv(testing_set_path)
  sentiment_mapping = {
      'Positive': 0,
      'Negative': 1,
      'None': 2
  }
  df_test['sentiment'] = df_test['sentiment'].map(sentiment_mapping)
  df_test = df_test.reset_index(drop=True)

  tokenizer = transformers.BertTokenizer.from_pretrained(BERT_MODEL)

  test_dataset = SentiHood(
      opinions_id = df_test['id'].values,
      text = df_test['text'].values,
      auxiliary_sentence = df_test['auxiliary_sentence'],
      targets = df_test['sentiment'].values,
      tokenizer = tokenizer,
      max_len = TRAIN_MAX_LEN
  )
  print(f"Training Set: {len(test_dataset)}")

  test_data_loader = torch.utils.data.DataLoader(
      test_dataset,
      batch_size = TRAIN_BATCH_SIZE,
      shuffle=False
  )

  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(f"Device: {device}")

  model = torch.load('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/Models/3.bin')
  infer_loop_function(data_loader=test_data_loader, model=model, device=device)
      
if __name__ == "__main__":
  run()

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Training Set: 22548
Device: cuda:0


  0%|                                                   | 0/940 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|█████████████████████████████████████████| 940/940 [03:21<00:00,  4.67it/s]


# Evaluation of BERT-pair NLI-M

In [None]:
def compute_sentiment_accuracy(df):
  """This function computes the sentiment classfication accuracy"""
  
  accuracy = df[df['predicted'] == df['actual']].shape[0]/df.shape[0] * 100
  return round(accuracy, 2)

df = pd.read_csv('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/PredictedValues.csv')
print(f'Sentiment Accuracy of BERT-pair NLI-M = {compute_sentiment_accuracy(df)}%')

Sentiment Accuracy of BERT-pair NLI-M = 96.57%


In [None]:
def compute_aspect_accuracy(df):
  """
  This function computes the strict aspect accuracy.
  0 => Represents that the aspect has not been detected.
  1 => Represents that the aspect has been detected.
  """
  
  df = df.replace([0, 1], 1).replace(2, 0)

  count = 0
  total = 0

  for i in range(0, df.shape[0], 12):
    true_values = df.iloc[i:i+12]['predicted']
    predicted_values = df.iloc[i:i+12]['actual']

    if (true_values == predicted_values).all():
      count += 1
    total += 1

  accuracy = float(count)/float(total)*100
  return round(accuracy, 2)

df = pd.read_csv('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/PredictedValues.csv')
print(f'Aspect Accuracy (strict) of BERT-pair NLI-M = {compute_aspect_accuracy(df)}%')

Aspect Accuracy (strict) of BERT-pair NLI-M = 69.72%


In [None]:
def compute_aspect_f1_score(df):
  """
  This function computest the macro F1 score of predicted aspects.
  0 => Represents that the aspect has not been detected.
  1 => Represents that the aspect has been detected.
  """
  
  df = df.replace([0, 1], 1).replace(2, 0)

  total_f1_score = 0
  total = 0
  
  for i in range(0, df.shape[0], 12):
    true_values = df.iloc[i:i+12]['predicted']
    predicted_values = df.iloc[i:i+12]['actual']

    total_f1_score += f1_score(true_values, predicted_values, average="macro")
    total += 1

  score = float(total_f1_score)/float(total)*100
  return round(score, 2)

df = pd.read_csv('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/PredictedValues.csv')
print(f"Aspect F1 score: {compute_aspect_f1_score(df)}")

Aspect F1 score: 89.53


# Prediction Result Analysis

This section analyses the predicted results to find the aspects and sentiments that are most and least accurate.

*Note*: Utilizing the fact that first 1491x12 entries in the loaded `df` are related to `location-1` and rest are related to `location-2`. 

In [None]:
df = pd.read_csv('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/PredictedValues.csv')

In [None]:
"""
Computes the positive correct, positive total, negative correct, negative total, 
none correct, none total corresponding to all the aspects of LOCATION1.
"""

aspects = ['dining', 'general', 'green-nature', 'live', 'multicultural', 'nightlife',
           'price', 'quiet', 'safety','shopping', 'touristy', 'transit-location']
location1_aspects_result_analysis = {}

for i in range(12):
  location1_aspects_result_analysis[aspects[i]] = [[0 ,0], [0 ,0], [0 ,0]]

for i in tqdm(range(0, df['id'].unique().shape[0]*12-12, 12), ncols=80):
  for j in range(12):
    if df.loc[i+j]['actual'] == df.loc[i+j]['predicted']:
      location1_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][0] += 1
    
    location1_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][1] += 1

100%|██████████████████████████████████████| 1490/1490 [00:05<00:00, 282.01it/s]


In [None]:
df_location_aspect = pd.DataFrame({"location": [], "aspect": [], "positive correct": [],
                                   "positive total": [], "negative correct": [],
                                   "negative total": [], "none correct": [], "none total": [],})

ii = 0
for key in location1_aspects_result_analysis.keys():
  df_location_aspect.loc[ii] = ['LOCATION1', f"{key}", 
                                location1_aspects_result_analysis[key][0][0], 
                                location1_aspects_result_analysis[key][0][1], 
                                location1_aspects_result_analysis[key][1][0], 
                                location1_aspects_result_analysis[key][1][1], 
                                location1_aspects_result_analysis[key][2][0], 
                                location1_aspects_result_analysis[key][2][1]]
  ii += 1

In [None]:
"""
Computes the positive correct, positive total, negative correct, negative total, 
none correct, none total corresponding to all the aspects of LOCATION2.
"""

aspects = ['dining', 'general', 'green-nature', 'live', 'multicultural', 'nightlife',
           'price', 'quiet', 'safety','shopping', 'touristy', 'transit-location']
location2_aspects_result_analysis = {}

for i in range(12):
  location2_aspects_result_analysis[aspects[i]] = [[0 ,0], [0 ,0], [0 ,0]]

for i in tqdm(range(df['id'].unique().shape[0]*12, df.shape[0]-12, 12), ncols=80):
  for j in range(12):
    if df.loc[i+j]['actual'] == df.loc[i+j]['predicted']:
      location2_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][0] += 1
    
    location2_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][1] += 1

100%|████████████████████████████████████████| 387/387 [00:01<00:00, 282.41it/s]


In [None]:
for key in location2_aspects_result_analysis.keys():
  df_location_aspect.loc[ii] = ['LOCATION2', f"{key}", 
                                location2_aspects_result_analysis[key][0][0], 
                                location2_aspects_result_analysis[key][0][1], 
                                location2_aspects_result_analysis[key][1][0], 
                                location2_aspects_result_analysis[key][1][1], 
                                location2_aspects_result_analysis[key][2][0], 
                                location2_aspects_result_analysis[key][2][1]]
  ii += 1

In [None]:
df_location_aspect['positive percentage'] = round(df_location_aspect['positive correct']/df_location_aspect['positive total']*100, 2)
df_location_aspect['negative percentage'] = round(df_location_aspect['negative correct']/df_location_aspect['negative total']*100, 2)
df_location_aspect['none percentage'] = round(df_location_aspect['none correct']/df_location_aspect['none total']*100, 2)

df_location_aspect['total percentage'] = round((df_location_aspect['positive correct'] + df_location_aspect['negative correct'] + df_location_aspect['none correct'])/(df_location_aspect['positive total'] + df_location_aspect['negative total'] + df_location_aspect['none total'])*100, 2)

In [None]:
df_location_aspect

Unnamed: 0,location,aspect,positive correct,positive total,negative correct,negative total,none correct,none total,positive percentage,negative percentage,none percentage,total percentage
0,LOCATION1,dining,30.0,30.0,0.0,2.0,1449.0,1458.0,100.0,0.0,99.38,99.26
1,LOCATION1,general,311.0,359.0,91.0,113.0,920.0,1018.0,86.63,80.53,90.37,88.72
2,LOCATION1,green-nature,35.0,40.0,0.0,0.0,1440.0,1450.0,87.5,,99.31,98.99
3,LOCATION1,live,52.0,63.0,19.0,23.0,1334.0,1404.0,82.54,82.61,95.01,94.3
4,LOCATION1,multicultural,34.0,39.0,3.0,3.0,1437.0,1448.0,87.18,100.0,99.24,98.93
5,LOCATION1,nightlife,59.0,62.0,1.0,2.0,1402.0,1426.0,95.16,50.0,98.32,98.12
6,LOCATION1,price,70.0,81.0,110.0,116.0,1250.0,1293.0,86.42,94.83,96.67,95.97
7,LOCATION1,quiet,13.0,14.0,13.0,15.0,1456.0,1461.0,92.86,86.67,99.66,99.46
8,LOCATION1,safety,55.0,61.0,56.0,66.0,1332.0,1363.0,90.16,84.85,97.73,96.85
9,LOCATION1,shopping,62.0,62.0,1.0,1.0,1410.0,1427.0,100.0,100.0,98.81,98.86


# Creating preds.jsonl

This section constructs the `preds.jsonl` file which contains model predictions and original annotations in the following json format.


```
{
  "opinions": [
    {
      "sentiment": "Positive",
      "aspect": "safety",
      "target_entity": "LOCATION1"
    }
  ],
  "id": 153,
  "text": " LOCATION1 is in Greater London and is a very safe place",
  "model_pred": [
    {
      "sentiment": ...,
      "aspect": ...,
      "target_entity":...
    },...
  ]
}
```

In [None]:
with open('/content/drive/MyDrive/PIL/BERT-ABSA/SentiHood Dataset/sentihood-test.json', 'r') as fp:
  testing_set = json.load(fp)

In [None]:
labels_to_sentiment_dict = {
    0: 'Positive',
    1: 'Negative',
    2: 'None'
}

In [None]:
BERT_MODEL = 'bert-base-uncased'
MAX_LEN = 160
locations = ['LOCATION1', 'LOCATION2']
aspects = ['dining', 'general', 'green-nature', 'live', 'multicultural', 'nightlife',
           'price', 'quiet', 'safety','shopping', 'touristy', 'transit-location']

tokenizer = transformers.BertTokenizer.from_pretrained(BERT_MODEL)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

model = torch.load('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/Models/0.bin')

for each_example in tqdm(testing_set, ncols=80):
  id = each_example['id']
  text = each_example['text'].strip()

  each_example['model_pred'] = []

  count_location = 1
  for location in locations:
    if location in text:
      # If "location" is present in the text, then utilize the trained model
      # to predict the aspects and their corresponding sentiment of the text.

      text = text.replace(location, 'location - ' + str(count_location))
      
      for aspect in aspects:
        auxiliary_sentence = f'location - {str(count_location)} - {aspect}'
        combined_text = text + ' ' + auxiliary_sentence
        
        inputs = tokenizer.encode_plus(
            combined_text,
            add_special_tokens = True,
            max_length = MAX_LEN,
            pad_to_max_length = True
        )
        ids = torch.tensor(inputs["input_ids"], dtype=torch.long).unsqueeze(0)
        mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).unsqueeze(0)
        token_type_ids = torch.tensor(inputs["token_type_ids"], dtype=torch.long).unsqueeze(0)

        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)

        outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)
        _, predicted = torch.max(outputs, 1)

        predicted = predicted.detach().cpu().numpy()

         # If predicted sentiment is not None, then add it to the preds.jsonl.
         
        if predicted[0] != 2:
          result = {
              "sentiment": labels_to_sentiment_dict[predicted[0]],
              "aspect": aspect,
              "target_entity": location
          }
          each_example['model_pred'].append(result)
      
    count_location += 1

Device: cuda:0


  0%|                                                  | 0/1491 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|███████████████████████████████████████| 1491/1491 [03:47<00:00,  6.57it/s]


In [None]:
with open('/content/drive/MyDrive/PIL/BERT-ABSA/Bert-pair/NLI-M/pred.jsonl', mode='w', encoding='utf-8') as fp:
  for each in testing_set:
    json_record = json.dumps(each, ensure_ascii=False)
    fp.write(json_record + '\n')