# RoBERTa Fine Tunning For Helpfulness Prediction

# Load Datasets

## Import Needed Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re

## Read CSV Files

In [2]:
train_df = pd.read_csv("train.csv")
train_df

Unnamed: 0.1,Unnamed: 0,_id,text,label,helpful,unhelpful
0,0,657855651a6d2c7052a63c2e,The selection here is okay if you're making ga...,helpful,1,0
1,1,657859921a6d2c7052a64d48,"Now THIS is a restaurant. Small, tidy, excell...",unhelpful,0,1
2,2,657855651a6d2c7052a63de3,In the last five minutes I had two men knock o...,helpful,1,0
3,3,657859921a6d2c7052a6529f,I love Potbelly's. I've eaten there several t...,unhelpful,0,1
4,4,657855641a6d2c7052a63332,"This is the best thai cuisine around, the curr...",helpful,1,0
...,...,...,...,...,...,...
7995,7995,657859921a6d2c7052a64d6e,Service was good and friendly. Food was good ...,unhelpful,0,1
7996,7996,657855651a6d2c7052a6440c,This place is a complete joke. I bought 4 tire...,helpful,1,0
7997,7997,657855651a6d2c7052a64353,I waited a week to post my review of White Dog...,helpful,1,0
7998,7998,657855641a6d2c7052a633cd,"We had a Groupon for Euphoria, so my husband a...",helpful,1,0


In [3]:
train_df["label"].value_counts()

label
helpful      4000
unhelpful    4000
Name: count, dtype: int64

In [4]:
test_df = pd.read_csv("test.csv")
test_df

Unnamed: 0.1,Unnamed: 0,_id,text,label,helpful,unhelpful
0,0,657859921a6d2c7052a64801,I was out in St. Louis on a business trip and ...,unhelpful,0,1
1,1,657859921a6d2c7052a647aa,Worst omelet I have ever eaten. With the upcha...,unhelpful,0,1
2,2,657855651a6d2c7052a63749,I love this place! It is my favorite go to pla...,helpful,1,0
3,3,657855651a6d2c7052a6408a,Came to Tamarind for lunch on Sunday afternoon...,helpful,1,0
4,4,657859921a6d2c7052a6554d,"Easter Sunday, over crowded at 6:00 PM, staff ...",unhelpful,0,1
...,...,...,...,...,...,...
1995,1995,657859921a6d2c7052a65259,Love this dealership. Bought my CRV there a fe...,unhelpful,0,1
1996,1996,657855651a6d2c7052a63ce4,We stopped at Tako due to the positive reviews...,helpful,1,0
1997,1997,657855651a6d2c7052a63d89,I had a quick get together with a couple of fr...,helpful,1,0
1998,1998,657859921a6d2c7052a64b23,"After a long week, a girl friend and I decided...",unhelpful,0,1


In [5]:
test_df["label"].value_counts()

label
unhelpful    1000
helpful      1000
Name: count, dtype: int64

### Clean Text

In [6]:
def clean(text, newline=True, quote=True, bullet_point=True, 
          link=True, strikethrough=True, spoiler=True,
          code=True, superscript=True, table=True, heading=True):
    
    text = re.sub("[^a-zA-Z]",  " ", str(text))
    
    if newline:
        text = re.sub(r'\n+', ' ', text)
        text = text.strip()
        text = re.sub(r'\s\s+', ' ', text)

    if quote:
        text = re.sub(r'\"?\\?&?gt;?', '', text)

    if bullet_point:
        text = re.sub(r'\*', '', text)
        text = re.sub('&amp;#x200B;', '', text)

    if link:
        text = re.sub(r'\[.*?\]\(.*?\)', '', text)

    if strikethrough:
        text = re.sub('~', '', text)

    if spoiler:
        text = re.sub('&lt;', '', text)
        text = re.sub(r'!(.*?)!', r'\1', text)

    if code:
        text = re.sub('`', '', text)

    if superscript:
        text = re.sub(r'\^\((.*?)\)', r'\1', text)

    if table:
        text = re.sub(r'\|', ' ', text)
        text = re.sub(':-', '', text)

    if heading:
        text = re.sub('#', '', text)
    return text

In [7]:
train_df['text'] = train_df['text'].apply(lambda x: clean(x))
train_df

Unnamed: 0.1,Unnamed: 0,_id,text,label,helpful,unhelpful
0,0,657855651a6d2c7052a63c2e,The selection here is okay if you re making ga...,helpful,1,0
1,1,657859921a6d2c7052a64d48,Now THIS is a restaurant Small tidy excellent ...,unhelpful,0,1
2,2,657855651a6d2c7052a63de3,In the last five minutes I had two men knock o...,helpful,1,0
3,3,657859921a6d2c7052a6529f,I love Potbelly s I ve eaten there several tim...,unhelpful,0,1
4,4,657855641a6d2c7052a63332,This is the best thai cuisine around the curry...,helpful,1,0
...,...,...,...,...,...,...
7995,7995,657859921a6d2c7052a64d6e,Service was good and friendly Food was good wi...,unhelpful,0,1
7996,7996,657855651a6d2c7052a6440c,This place is a complete joke I bought tires o...,helpful,1,0
7997,7997,657855651a6d2c7052a64353,I waited a week to post my review of White Dog...,helpful,1,0
7998,7998,657855641a6d2c7052a633cd,We had a Groupon for Euphoria so my husband an...,helpful,1,0


In [8]:
test_df['text'] = test_df['text'].apply(lambda x: clean(x))
test_df

Unnamed: 0.1,Unnamed: 0,_id,text,label,helpful,unhelpful
0,0,657859921a6d2c7052a64801,I was out in St Louis on a business trip and w...,unhelpful,0,1
1,1,657859921a6d2c7052a647aa,Worst omelet I have ever eaten With the upchar...,unhelpful,0,1
2,2,657855651a6d2c7052a63749,I love this place It is my favorite go to plac...,helpful,1,0
3,3,657855651a6d2c7052a6408a,Came to Tamarind for lunch on Sunday afternoon...,helpful,1,0
4,4,657859921a6d2c7052a6554d,Easter Sunday over crowded at PM staff lacked ...,unhelpful,0,1
...,...,...,...,...,...,...
1995,1995,657859921a6d2c7052a65259,Love this dealership Bought my CRV there a few...,unhelpful,0,1
1996,1996,657855651a6d2c7052a63ce4,We stopped at Tako due to the positive reviews...,helpful,1,0
1997,1997,657855651a6d2c7052a63d89,I had a quick get together with a couple of fr...,helpful,1,0
1998,1998,657859921a6d2c7052a64b23,After a long week a girl friend and I decided ...,unhelpful,0,1


## Build PyTorch Dataset Class

In [9]:
from torch.utils.data import Dataset
from transformers import AutoTokenizer
import torch

In [10]:
class Helpfulness_Dataset(Dataset):
    
    def __init__(self, data, tokenizer, attributes, max_token_len):
        self.data = data
        self.tokenizer = tokenizer
        self.attributes = attributes
        self.max_token_len = max_token_len
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        item = self.data.iloc[index]
        text = item.text
        attributes = torch.FloatTensor(item[self.attributes])
        tokens = self.tokenizer.encode_plus(text,
                                            add_special_tokens=True,
                                            return_tensors="pt",
                                            truncation=True,
                                            max_length=self.max_token_len,
                                            padding="max_length",
                                            return_attention_mask=True)
        return {
            "input_ids": tokens.input_ids.flatten(),
            "attention_mask": tokens.attention_mask.flatten(),
            "labels": attributes
        }

## Create Train And Test PyTorch Datasets

In [11]:
attributes = ["helpful", "unhelpful"]
model_name = "roberta-base"
max_token_length = 512
tokenizer = AutoTokenizer.from_pretrained(model_name)
hfs_ds_train = Helpfulness_Dataset(train_df, tokenizer, attributes, max_token_length)
hfs_ds_val = Helpfulness_Dataset(test_df, tokenizer, attributes, max_token_length)

## Tokization Example

### Helpful Review

In [12]:
hfs_ds_train.data.iloc[0].text

'The selection here is okay if you re making garments quilting even rag is a no go ever Fleece selection and price is great and one of the reasons to go here Warm and natural batting is more expensive than Hancock or anywhere else except high end quilt shops Price is okay on fabric for garments and there s a nice selection of silk rayon syn but the quality of some fabrics reflects the low everyday price My opinion is that these are nd and rd cut fabrics not top quality If you use a coupon or go for a sale at Hancock you can get better quality fabric at close to the same price Plus their notions are about non existant just try to match thread to fabric there good luck The real reason for a low rating is the customer service I agree with the woman from San Jose who said it was bad unless they know you That was my experience also I took my daughter here and after she left me to go to the bathroom she came back freaked out She said one of the ladies who worked there grabbed her and hugged 

In [13]:
hfs_ds_train.__getitem__(0)

{'input_ids': tensor([    0,   133,  4230,   259,    16,  8578,   114,    47,   769,   442,
         30625,  2677,   718,  2577,   190, 31179,    16,    10,   117,   213,
           655,   274,  7445,  1755,  4230,     8,   425,    16,   372,     8,
            65,     9,     5,  2188,     7,   213,   259, 19516,     8,  1632,
          8032,    16,    55,  3214,    87, 19632,    50,  4558,  1493,  4682,
           239,   253,  2677, 10325,  6464,  3655,    16,  8578,    15, 10199,
            13, 30625,     8,    89,   579,    10,  2579,  4230,     9, 22288,
         33803,   261, 17796,    53,     5,  1318,     9,   103, 26348,  6771,
             5,   614,  7476,   425,  1308,  2979,    16,    14,   209,    32,
           295,   417,     8,   910,   417,   847, 26348,    45,   299,  1318,
           318,    47,   304,    10, 22939,    50,   213,    13,    10,  1392,
            23, 19632,    47,    64,   120,   357,  1318, 10199,    23,   593,
             7,     5,   276,   425,  4

### Unhelpful Review

In [14]:
hfs_ds_val.data.iloc[0].text

'I was out in St Louis on a business trip and was looking forward to trying Pappy s due to all the hype from yelp and the travel channel I ordered a full rack of ribs with deep fried corn and green beans The deep fried corn was awesome but I was pretty dissapointed in the ribs I m not sure if it was because I came later in the day around pm shouldn t matter but my ribs were pretty dry I ve had my fair share of dry rub ribs and expect them to still be tender and moist My travel partners were also not too impressed with the ribs from Pappy s Maybe it was an off day for them but this is definitely not close to the best ribs that I ve had'

In [15]:
hfs_ds_val.__getitem__(0)

{'input_ids': tensor([    0,   100,    21,    66,    11,   312,  3217,    15,    10,   265,
          1805,     8,    21,   546,   556,     7,   667,   221, 31953,   579,
           528,     7,    70,     5, 14761,    31,  1423,   523,   642,     8,
             5,  1504,  4238,    38,  2740,    10,   455, 20004,     9, 21443,
            19,  1844, 16708,  7636,     8,  2272, 13095,    20,  1844, 16708,
          7636,    21,  6344,    53,    38,    21,  1256, 14863,  1115, 26427,
            11,     5, 21443,    38,   475,    45,   686,   114,    24,    21,
           142,    38,   376,   423,    11,     5,   183,   198,  4751,  4395,
           326,   948,    53,   127, 21443,    58,  1256,  3841,    38,  5030,
            56,   127,  2105,   458,     9,  3841, 14204, 21443,     8,  1057,
           106,     7,   202,    28,  8780,     8, 34257,  1308,  1504,  2567,
            58,    67,    45,   350,  6889,    19,     5, 21443,    31,   221,
         31953,   579,  5359,    24,   

# 2. Data Module

## Import Needed Libraries

In [16]:
import pytorch_lightning as pl
from torch.utils.data import DataLoader

## Creating PyTorch Data Module

In [17]:
class Helpfulness_Data_Model(pl.LightningDataModule):
    
    def __init__(self, attributes, batch_size, max_token_length, model_name):
        super().__init__()
        self.attributes = attributes
        self.batch_size = batch_size
        self.max_token_length = max_token_length
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        
    def setup(self, stage = None):
        if stage in (None, "fit"):
            self.train_dataset = Helpfulness_Dataset(train_df, tokenizer, attributes, 512)
            self.val_dataset = Helpfulness_Dataset(test_df, tokenizer, attributes, 512)
        if stage == "predict":
            self.val_dataset = Helpfulness_Dataset(test_df, tokenizer, attributes, 512)
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset,
                          batch_size=self.batch_size,
                          num_workers=4,
                          shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset,
                          batch_size=self.batch_size,
                          num_workers=4,
                          shuffle=False)
    
    def predict_dataloader(self):
        return DataLoader(self.val_dataset,
                          batch_size=self.batch_size,
                          num_workers=4,
                          shuffle=False)

## Create Instance Of Our Data Module And Set It Up

In [18]:
attributes = ["helpful", "unhelpful"]
model_name = "roberta-base"
batch_size = 8
max_token_length = 512
hfs_data_module = Helpfulness_Data_Model(attributes, batch_size, max_token_length, model_name)
hfs_data_module.setup()
dl = hfs_data_module.train_dataloader()

## Number Of Batches

In [19]:
len(dl)

1000

# 3. Model

## Import Needed Libraries

In [20]:
from transformers import AutoModel, AdamW, get_cosine_schedule_with_warmup
import torch.nn as nn
import math
from torchmetrics.functional.classification import auroc
import torch.nn.functional as F

2023-12-20 20:33:16.662555: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-20 20:33:16.690421: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Helpfulness Classifier Class

In [21]:
class Helpfulness_Classifier(pl.LightningModule):
    
    def __init__(self, config: dict):
        super().__init__()
        self.config = config
        self.pretrained_model = AutoModel.from_pretrained(config["model_name"], return_dict=True)
        self.hidden= nn.Linear(self.pretrained_model.config.hidden_size, self.pretrained_model.config.hidden_size)
        self.classification = nn.Linear(self.pretrained_model.config.hidden_size, self.config["n_labels"])
        torch.nn.init.xavier_uniform_(self.hidden.weight)
        torch.nn.init.xavier_uniform_(self.classification.weight)
        self.loss_fun = nn.BCEWithLogitsLoss(reduction="mean")
        self.dropout = nn.Dropout()
        
    def forward(self, input_ids, attention_mask, labels=None):
        output = self.pretrained_model(input_ids = input_ids, attention_mask = attention_mask)
        pooled_output = torch.mean(output.last_hidden_state, 1)
        pooled_output = self.hidden(pooled_output)
        pooled_output = self.dropout(pooled_output)
        pooled_output = F.relu(pooled_output)
        logits = self.classification(pooled_output)
        loss = 0 
        if labels is not None:
            loss = self.loss_fun(logits.view(-1, self.config["n_labels"]), labels.view(-1, self.config["n_labels"]))
        return loss, logits
    
    def training_step(self, batch, batch_index):
        loss, logits = self(**batch)
        self.log("train loss", loss, prog_bar=True, logger=True)
        return {"loss": loss, "predictions": logits, "labels": batch["labels"]}
    
    def validations_step(self, batch, batch_index):
        loss, logits = self(**batch)
        self.log("validation loss", loss, prog_bar=True, logger=True)
        return {"val_loss": loss, "predictions": logits, "labels": batch["labels"]}
    
    def predict_step(self, batch, batch_index):
        _, logits = self(**batch)
        return logits
    
    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=self.config["lr"], weight_decay=self.config["w_decay"])
        total_steps = self.config["train_size"] / self.config["bs"]
        warmup_steps = math.floor(total_steps * self.config["warmup"])
        scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
        return [optimizer], [scheduler]
    

In [22]:
config = {
    "model_name": "roberta-base",
    "n_labels": len(attributes),
    "bs": 8,
    "lr": 2e-5,
    "warmup": 0.2,
    "w_decay": 0.001,
    "train_size": len(hfs_data_module.train_dataloader()),
    "n_epochs": 4
}

model = Helpfulness_Classifier(config)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [23]:
idx = 0

input_ids = hfs_ds_train.__getitem__(idx)["input_ids"]
am = hfs_ds_train.__getitem__(idx)["attention_mask"]
labels = hfs_ds_train.__getitem__(idx)["labels"]

loss, output = model(input_ids.unsqueeze(dim=0), am.unsqueeze(dim=0), labels.unsqueeze(dim=0))

In [24]:
loss, output

(tensor(0.8006, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 tensor([[-0.2596,  0.1481]], grad_fn=<AddmmBackward0>))

## Train

In [25]:
torch.set_float32_matmul_precision('medium')
hfs_data_module = Helpfulness_Data_Model(attributes, config["bs"], max_token_length, model_name)
hfs_data_module.setup()

model = Helpfulness_Classifier(config)

trainer = pl.Trainer(max_epochs=config["n_epochs"], num_sanity_val_steps=50)
trainer.fit(model, hfs_data_module)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type              | Params
-------------------------------------------------------
0 | pretrained_model | RobertaModel      | 124 M 
1 | hidden           | Linear            | 590 K 
2 | classification   | Linear            | 1.5 K 
3 | loss_fun         | BCEWithLogitsLoss | 0     
4 | dropout          | Dropout           | 0     
-------------------------------------------------------
125 M     Trainable params
0         Non-trainable params
125 M     Total params
500.951   Total estim

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Training: |                                                              | 0/? [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

`Trainer.fit` stopped: `max_epochs=4` reached.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


## Predict / Eval

In [26]:
def classify_reviews(model, dm):
    preictions = trainer.predict(model, datamodule=dm)
    flattened_prediction = np.stack([torch.sigmoid(torch.Tensor(p)) for batch in preictions for p in batch])
    return flattened_prediction

In [27]:
hfs_data_module.val_dataset.data[["helpful", "helpful"]]

Unnamed: 0,helpful,helpful.1
0,0,0
1,0,0
2,1,1
3,1,1
4,0,0
...,...,...
1995,0,0
1996,1,1
1997,1,1
1998,0,0


In [28]:
predictions = classify_reviews(model, hfs_data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Predicting: |                                                            | 0/? [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [29]:
helpful_array = hfs_data_module.val_dataset.data[["helpful", "unhelpful"]]
helpful_array

Unnamed: 0,helpful,unhelpful
0,0,1
1,0,1
2,1,0
3,1,0
4,0,1
...,...,...
1995,0,1
1996,1,0
1997,1,0
1998,0,1


In [30]:
pd.DataFrame(predictions)

Unnamed: 0,0,1
0,0.255624,0.725359
1,0.103792,0.892647
2,0.082891,0.913614
3,0.193389,0.799366
4,0.331427,0.662039
...,...,...
1995,0.091183,0.903223
1996,0.257866,0.761975
1997,0.436694,0.577040
1998,0.170887,0.835405


In [31]:
def generate_confusion_matrix(predictions, ground_truth_labels):
    
    tp, fp, tn, fn = 0, 0, 0, 0
    
    for i in range(len(predictions)):
        if (predictions.iloc[i][0] > predictions.iloc[i][1]):
            helpful = 1
            unhelpful = 0
        else:
            helpful = 0
            unhelpful = 1
        if helpful == 1 and helpful == ground_truth_labels.iloc[i]["helpful"]:
            tp += 1 
        elif helpful == 1 and helpful != ground_truth_labels.iloc[i]["helpful"]:
            fp += 1
        elif unhelpful == 1 and unhelpful == ground_truth_labels.iloc[i]["unhelpful"]:
            tn += 1
        elif unhelpful == 1 and unhelpful != ground_truth_labels.iloc[i]["unhelpful"]:
            fn += 1
    return tp, fp, tn, fn

In [32]:
tp, fp, tn, fn = generate_confusion_matrix(pd.DataFrame(predictions), hfs_data_module.val_dataset.data[["helpful", "unhelpful"]])
print(f"True Positive = {tp}")
print(f"False Positive = {fp}")
print(f"True negative = {tn}")
print(f"False negative = {fn}")
print(f"Total = {tp + fp + tn + fn}")
with open("Results/RoBERTa-Base-Met-Clean.txt", "w") as f:
    f.write(f"True Positive = {tp}\n")
    f.write(f"False Positive = {fp}\n")
    f.write(f"True negative = {tn}\n")
    f.write(f"False negative = {fn}\n")
    f.write(f"Total = {tp + fp + tn + fn}")

True Positive = 531
False Positive = 91
True negative = 909
False negative = 469
Total = 2000


In [33]:
def calculate_metrics(tp, fp, tn, fn):
    accuracy = (tp + tn) / (tp + fp + tn + fn) if (tp + fp + tn + fn) != 0 else 0.0
    precision = tp / (tp + fp) if (tp + fp) != 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0.0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0.0
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0.0
    sensitivity = recall  # Sensitivity is another name for Recall
    false_positive_rate = fp / (fp + tn) if (fp + tn) != 0 else 0.0
    
    return accuracy, precision, recall, f1_score, specificity, sensitivity, false_positive_rate

In [34]:
accuracy, precision, recall, f1_score, specificity, sensitivity, false_positive_rate = calculate_metrics(tp, fp, tn, fn)
print(f"Accuracy = {accuracy}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1_score}")
print(f"Specificity = {specificity}")
print(f"Sensitivity = {sensitivity}")
print(f"False Positive Rate = {false_positive_rate}")
with open("Results/RoBERTa-Base-Results-Clean.txt", "w") as f:
    f.write(f"Accuracy = {accuracy}\n")
    f.write(f"Precision = {precision}\n")
    f.write(f"Recall = {recall}\n")
    f.write(f"F1 Score = {f1_score}\n")
    f.write(f"Specificity = {specificity}\n")
    f.write(f"Sensitivity = {sensitivity}\n")
    f.write(f"False Positive Rate = {false_positive_rate}\n")

Accuracy = 0.72
Precision = 0.8536977491961415
Recall = 0.531
F1 Score = 0.6547472256473489
Specificity = 0.909
Sensitivity = 0.531
False Positive Rate = 0.091
