<a href="https://colab.research.google.com/github/zen030/CourseProject/blob/main/Model_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **This notebook is implemented and tested in Google Colab PRO environment**

# Evaluation

In [None]:
!pip install transformers
!pip install PyDrive

In [1]:
# Import the required modules.

# Evaluation.
import pandas as pd
import json
from transformers import BertTokenizer
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, SequentialSampler
import torch.nn.functional as F 
import torch
from transformers import BertForSequenceClassification
import numpy as np

# To manage dataset.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [2]:
# model file location: https://drive.google.com/file/d/1sn3QT-GlFvgk7XHv-144WC6gf9gHCAnE/view?usp=sharing
# test.jsonl file location: https://drive.google.com/file/d/1vA3uyqy1TZmahgZ0PeNRFx67LuYeAkoW/view?usp=sharing

# The pre-trained model using training dataset.
# Google Drive file name.
model_file = 'lr_2e-5_1e-8_17_4.model'
# Google Drive unique file ID
model_file_id = "1sn3QT-GlFvgk7XHv-144WC6gf9gHCAnE"

# The evaluation/testing dataset.
# Google Drive file name.
evaluation_file = 'test.jsonl'
# Google Drive unique file ID.
test_jsonl_file_id = "1vA3uyqy1TZmahgZ0PeNRFx67LuYeAkoW"

In [None]:
# The files are shared to public.
# Login using Google Account to proceed.
# Copy-past the code.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

downloaded = drive.CreateFile({'id':test_jsonl_file_id})
downloaded.GetContentFile(evaluation_file) 

downloaded = drive.CreateFile({'id':model_file_id})
downloaded.GetContentFile(model_file)

In [4]:
# Read jsonl file into list (of json)
evaluation_data_file = evaluation_file
with open(evaluation_data_file) as f:
    # creating array of json
    lines = f.read().splitlines()
print(f'Number of lines in file: {len(lines)}')

# Normalize json into dataframe columns
df = pd.json_normalize(pd.DataFrame(lines)[0].apply(json.loads))
print(f'Number of records in Pandas DataFrame: {len(df)}')

# Lowercase response text
# BERT Model can lowercase the text in the setting
# I choose to lowercase the text here to have a uniform text format
# In case I need to modfiy the text for a reason
df.response = df.response.str.lower()

# Print DataFrame to have preview of the data
df

Number of lines in file: 1800
Number of records in Pandas DataFrame: 1800


Unnamed: 0,id,response,context
0,twitter_1,"@user @user @user my 3 year old , that just fi...","[Well now that ’ s problematic AF <URL>, @USER..."
1,twitter_2,@user @user how many verifiable lies has he to...,[Last week the Fake News said that a section o...
2,twitter_3,@user @user @user maybe docs just a scrub of a...,[@USER Let ’ s Aplaud Brett When he deserves i...
3,twitter_4,@user @user is just a cover up for the real ha...,[Women generally hate this president . What's ...
4,twitter_5,@user @user @user the irony being that he even...,"[Dear media Remoaners , you excitedly sharing ..."
...,...,...,...
1795,twitter_1796,@user @user @user is definitely the best out t...,[I have been a business customer of MWeb @USER...
1796,twitter_1797,@user @user ye let her out run wild and infect...,[A woman refuses to have her temperature taken...
1797,twitter_1798,"@user @user @user thanks for that , i would ha...",[The reason big government wants @USER out is ...
1798,twitter_1799,@user @user @user yes also #found this on #new...,[Happy #musicmonday and #thanks for #all your ...


In [5]:
# Check maximum character length of 'response'
max_response_chars = df.response.str.len().max()
print(f"Maximum character length of 'response': {max_response_chars}")

# Adding 5 extra characters in case special token is needed by the model
max_length = max_response_chars + 5 

# Review the data
df

Maximum character length of 'response': 310


Unnamed: 0,id,response,context
0,twitter_1,"@user @user @user my 3 year old , that just fi...","[Well now that ’ s problematic AF <URL>, @USER..."
1,twitter_2,@user @user how many verifiable lies has he to...,[Last week the Fake News said that a section o...
2,twitter_3,@user @user @user maybe docs just a scrub of a...,[@USER Let ’ s Aplaud Brett When he deserves i...
3,twitter_4,@user @user is just a cover up for the real ha...,[Women generally hate this president . What's ...
4,twitter_5,@user @user @user the irony being that he even...,"[Dear media Remoaners , you excitedly sharing ..."
...,...,...,...
1795,twitter_1796,@user @user @user is definitely the best out t...,[I have been a business customer of MWeb @USER...
1796,twitter_1797,@user @user ye let her out run wild and infect...,[A woman refuses to have her temperature taken...
1797,twitter_1798,"@user @user @user thanks for that , i would ha...",[The reason big government wants @USER out is ...
1798,twitter_1799,@user @user @user yes also #found this on #new...,[Happy #musicmonday and #thanks for #all your ...


In [6]:
bert_model = 'bert-large-uncased'
batch_size = 5

tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)

encoded_data_evaluation = tokenizer.batch_encode_plus(
    df.response.values,
    add_special_tokens=True,
    return_attention_mask=True,
    max_length=max_length,
    padding='max_length',
    return_tensors='pt'
)

input_ids_evaluation = encoded_data_evaluation['input_ids']
attention_masks_evaluation = encoded_data_evaluation['attention_mask']

dataset_evaluation = TensorDataset(input_ids_evaluation, attention_masks_evaluation)

dataloader_eval = DataLoader(dataset_evaluation, sampler=SequentialSampler(dataset_evaluation), batch_size=batch_size)

In [7]:
# If GPU is available.
if torch.cuda.is_available():    
    # PyTorch to use the GPU    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If GPU is not available. Use the CPU.
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

# To set the model into a training mode
label_dict = {'SARCASM': 0, 'NOT_SARCASM': 1}
bert_model = 'bert-large-uncased'
model = BertForSequenceClassification.from_pretrained(bert_model,
                                                      num_labels=len(label_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)
model.to(device)
model.load_state_dict(torch.load(model_file, map_location=torch.device(device)))

model.eval()
loss_val_total = 0
predictions = []

for batch in dataloader_eval:
  batch = tuple(b.to(device) for b in batch)
  inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}

  with torch.no_grad():
    # evaluate the validation dataset
    output = model(**inputs)
    logits = output[0]
    # print(output)
    logits = logits.detach().cpu().numpy()
    predictions.append(logits)

predictions = np.concatenate(predictions, axis=0)
preds_flat = np.argmax(predictions, axis=1).flatten()


print('######################')
print('# Evaluation is done #')
print('######################')

No GPU available, using the CPU instead.


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

RuntimeError: ignored

In [None]:
# answer.txt
i = 1
for pred in enumerate(preds_flat):
  if pred[1] == 0:
    text = 'SARCASM'
  else:
    text = 'NOT_SARCASM'
  print('twitter_{0},{1}'.format(i, text))
  i = i + 1

# Summary

- f1 = 0.757905138339921
- recall = 0.8522222222222222
- precision = 0.6823843416370107

Baseline score (f1, recall and precision) is 0.723