<a href="https://colab.research.google.com/github/poffertje/TextMining/blob/master/code/sentiment_analysis/siebert_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Evaluating SieBERT
This code is derived from https://colab.research.google.com/github/chrsiebert/sentiment-roberta-large-english/blob/main/sentiment_roberta_pipeline.ipynb and provided by the creator of SieBERT to evaluate the fine-tuned model on our own dataset.

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Install the transformers library
!pip install transformers

In [None]:
# Import required packages
import torch
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer

# Create class for data preparation
class SimpleDataset:
    def __init__(self, tokenized_texts):
        self.tokenized_texts = tokenized_texts
    
    def __len__(self):
        return len(self.tokenized_texts["input_ids"])
    
    def __getitem__(self, idx):
        return {k: v[idx] for k, v in self.tokenized_texts.items()}

In [None]:
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(device)

torch.cuda.get_device_name(0)

cuda


'Tesla P100-PCIE-16GB'

**Import Dataset**

In [None]:
prod_test_set = pd.read_csv('/content/drive/Shareddrives/Minecraft/Datasets/8April_production_set.csv')
test_set_24 = pd.read_csv('/content/drive/Shareddrives/Minecraft/Datasets/test_sample24.csv')

15000


**Load SieBERT model**

In [None]:
# Load tokenizer and model, create trainer
model_name = "siebert/sentiment-roberta-large-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
trainer = Trainer(model=model)

Downloading:   0%|          | 0.00/256 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/687 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/780k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32G [00:00<?, ?B/s]

In [None]:
model.to(device)

**Create lists of texts for prediction**

In [None]:
# Create list of texts (can be imported from .csv, .xls etc.)
pred_texts = list(test_set_24['review'])
prod_pred_texts = list(prod_test_set['review'])

24000


**Development Test Set Evaluation: Tokenize, predict labels and generate classification report**

In [None]:
# Tokenize texts and create prediction data set
tokenized_texts = tokenizer(pred_texts,truncation=True,padding=True)
pred_dataset = SimpleDataset(tokenized_texts)

In [None]:
# Run predictions
predictions = trainer.predict(pred_dataset)

***** Running Prediction *****
  Num examples = 24000
  Batch size = 8


In [None]:
# Transform predictions to labels
preds = predictions.predictions.argmax(-1)
labels = pd.Series(preds).map(model.config.id2label)
scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)

In [None]:
true_labels = test_set_24['sentiment label']
print(classification_report(true_labels,preds))

              precision    recall  f1-score   support

           0       0.91      0.68      0.78      5217
           1       0.92      0.98      0.95     18783

    accuracy                           0.92     24000
   macro avg       0.92      0.83      0.87     24000
weighted avg       0.92      0.92      0.91     24000



In [None]:
# Create DataFrame with texts, predictions, labels, and scores
df = pd.DataFrame(list(zip(pred_texts,preds,labels,scores)), columns=['text','pred','label','score'])
df.head()

Unnamed: 0,text,pred,label,score
0,"Well, this is absolutely a great addition to t...",1,POSITIVE,0.998928
1,I love this place. I have been going for year...,1,POSITIVE,0.998925
2,Enjoyed dinner with a group of friends. Our se...,1,POSITIVE,0.998923
3,Yummy! Breakfast burrito for breakfast and ve...,1,POSITIVE,0.998885
4,We came at 8 and the wait was about 10 minutes...,1,POSITIVE,0.998928


**Production Test Set Evaluation: Tokenize, predict labels and generate classification report**

In [None]:
prod_pred_texts = list(prod_test_set['review'])

In [None]:
prod_tokenized_texts = tokenizer(prod_pred_texts,truncation=True,padding=True)
prod_pred_dataset = SimpleDataset(prod_tokenized_texts)

In [None]:
prod_predictions = trainer.predict(prod_pred_dataset)

***** Running Prediction *****
  Num examples = 2153
  Batch size = 8


In [None]:
prod_preds = prod_predictions.predictions.argmax(-1)
prod_labels = pd.Series(prod_preds).map(model.config.id2label)
#prod_scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)

In [None]:
from sklearn.metrics import classification_report
prod_true_labels = prod_test_set['sentiment label']
print(classification_report(prod_true_labels,prod_preds))

              precision    recall  f1-score   support

           0       0.92      0.70      0.80       650
           1       0.88      0.97      0.93      1503

    accuracy                           0.89      2153
   macro avg       0.90      0.84      0.86      2153
weighted avg       0.89      0.89      0.89      2153

