<a href="https://colab.research.google.com/github/sgbyteninja/sentiment_analysis_customer_reviews/blob/main/RoBERTa_training_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports


In [1]:
# Install necessary packages
!pip install datasets
!pip install huggingface_hub

# Import libraries
import pandas as pd
from datasets import Dataset
import torch
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    AutoConfig,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    AutoModelForSequenceClassification,
    AutoTokenizer
)
from torch.utils.data import DataLoader
from google.colab import drive
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from huggingface_hub import login, Repository



In [2]:
# Login to Hugging Face
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Tokenisation

In [7]:
# URLs of the CSV files from my Github repository
train_url = (
    'https://raw.githubusercontent.com/sgbyteninja/sentiment_analysis_customer_reviews/'
    'refs/heads/main/train_data.csv'
)
test_url = (
    'https://raw.githubusercontent.com/sgbyteninja/sentiment_analysis_customer_reviews/'
    'refs/heads/main/test_data.csv'
)
val_url = (
    'https://raw.githubusercontent.com/sgbyteninja/sentiment_analysis_customer_reviews/'
    'refs/heads/main/val_data.csv'
)

# Loading the CSV files
train_df = pd.read_csv(train_url)
test_df = pd.read_csv(test_url)
val_df = pd.read_csv(val_url)



In [10]:
# Converting the train_df into Hugging Face dataset format
train_df = Dataset.from_pandas(train_df)
val_df = Dataset.from_pandas(val_df)
test_df = Dataset.from_pandas(test_df)

# Load the RoBERTa tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')


In [11]:
# Tokenize the review column
def tokenize_function(examples):
    """
    Tokenizes the 'review' column in the input examples.

    This function takes a batch of examples containing the 'review' column (text data)
    and tokenizes the text using a pre-defined tokenizer. The tokenized output is
    padded to a maximum length and truncated where necessary to ensure consistency in the input length.

    Args:
        examples (dict): A dictionary containing the input data, including a 'review' key
                         with text that needs to be tokenized.

    Returns:
        dict: A dictionary containing tokenized inputs, with keys 'input_ids' and 'attention_mask'.
    """
    return tokenizer(
        examples['review'],
        padding="max_length",
        truncation=True
    )

In [12]:
# Tokenize the datasets
train_df = train_df.map(tokenize_function, batched=True)
val_df = val_df.map(tokenize_function, batched=True)
test_df = test_df.map(tokenize_function, batched=True)

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
# Check if the tokenization worked properly
print(train_df[0])
print(val_df[0])
print(test_df[0])

{'review': "A very nice pizza.  I haven't made up my mind which is more authentic, NYPD or Ray's.  Both, have the thin NY style crust, but NYPD makes me feel like I'm waiting for an E train to Queens; whereas Ray's feels more upper east side-ish.", 'label': 0, 'input_ids': [0, 250, 182, 2579, 9366, 4, 1437, 38, 2220, 75, 156, 62, 127, 1508, 61, 16, 55, 12757, 6, 20283, 50, 4622, 18, 4, 1437, 1868, 6, 33, 5, 7174, 5300, 2496, 22196, 6, 53, 20283, 817, 162, 619, 101, 38, 437, 2445, 13, 41, 381, 2341, 7, 12446, 131, 9641, 4622, 18, 2653, 55, 2853, 3017, 526, 12, 1173, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
# Assuring the compatibility with PyTorch
train_df.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_df.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_df.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# Model and Configuration Setup


In [None]:
# Extracting the number of names and classes
num_labels = len(set(train_df["label"]))
id2label = {i: str(i) for i in range(num_labels)}

In [None]:
# Configuration of the RoBERTa-model
config = AutoConfig.from_pretrained("roberta-base")
config.update({"id2label": id2label, "num_labels": num_labels})

In [None]:
# Loading the RoBERTa model
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base", config=config
)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Defining the Metrics for the Model Evaluation


In [23]:
# Defining the Metrics for Model Evaluation
# For Classification it makes sense to use the metrics accuracy, precision, recall and f1 (confusion matrix)
def compute_metrics(p):
    """
    Computes classification metrics (accuracy, precision, recall, F1-score)
    for the given predictions and labels.

    This function calculates several evaluation metrics for classification tasks,
    including precision, recall, F1-score, and accuracy, using the predicted labels
    and true labels. It uses a weighted average for precision, recall, and F1-score.

    Args:
        p (tuple): A tuple containing two elements:
            - predictions (array-like): The predicted labels (probabilities or logits).
            - labels (array-like): The true labels for the data.

    Returns:
        dict: A dictionary containing the following metrics:
            - 'accuracy' (float): Accuracy score of the model.
            - 'precision' (float): Weighted precision score.
            - 'recall' (float): Weighted recall score.
            - 'f1' (float): Weighted F1-score.
    """
    predictions, labels = p
    predictions = predictions.argmax(axis=-1)

    # Compute precision, recall, and f1-score for each class
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='weighted'
    )

    accuracy = accuracy_score(labels, predictions)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

# Define the Parameters for Training


In [24]:
# Defining the Training parameters
training_args = TrainingArguments(
    output_dir="./roberta_sentiment",
    num_train_epochs=20,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    learning_rate=5e-5,
    weight_decay=0.01,
    warmup_steps=500,
    save_strategy="epoch",
    load_best_model_at_end=True,
    save_total_limit=2,
    report_to="none",
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

In [25]:
# Add EarlyStoppingCallback
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=2
)

# Setup the Trainer and train the Model


In [26]:
# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_df,
    eval_dataset=val_df,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping_callback]
)


In [None]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5229,0.516763,0.767,0.824393,0.767,0.786999
2,0.5282,0.609901,0.817,0.802743,0.817,0.796193
3,0.5612,0.511255,0.832,0.820588,0.832,0.8253
4,0.1716,0.648121,0.825,0.819718,0.825,0.816432
5,0.4532,0.765491,0.827,0.842037,0.827,0.830053


TrainOutput(global_step=5000, training_loss=0.6295077795743942, metrics={'train_runtime': 3983.0661, 'train_samples_per_second': 40.17, 'train_steps_per_second': 5.021, 'total_flos': 1.128021098496e+16, 'train_loss': 0.6295077795743942, 'epoch': 5.0})

# Safe the trained Model to Hugging-Face-Hub


In [None]:
# Create the repository object and clone
repo = Repository(local_dir="/content/roberta_sentiment", clone_from="sgbyteninja/sentiment_analysis_with_roBERTa")

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/sgbyteninja/sentiment_analysis_with_roBERTa into local empty directory.


Download file model.safetensors:   0%|          | 8.00k/499M [00:00<?, ?B/s]

Clean file model.safetensors:   0%|          | 1.00k/499M [00:00<?, ?B/s]

In [None]:
# Saving the model
model.push_to_hub("sgbyteninja/sentiment_analysis_with_roBERTa")
tokenizer.push_to_hub("sgbyteninja/sentiment_analysis_with_roBERTa")

README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/523M [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/sgbyteninja/sentiment_analysis_with_roBERTa/commit/37b49e6923d7d8409f3900a93242cece17c99264', commit_message='Upload tokenizer', commit_description='', oid='37b49e6923d7d8409f3900a93242cece17c99264', pr_url=None, repo_url=RepoUrl('https://huggingface.co/sgbyteninja/sentiment_analysis_with_roBERTa', endpoint='https://huggingface.co', repo_type='model', repo_id='sgbyteninja/sentiment_analysis_with_roBERTa'), pr_revision=None, pr_num=None)

# Evaluation of the Model


In [27]:
# Evaluate the model
results = trainer.evaluate(test_df)

# Print evaluation results
print(results)

{'eval_loss': 0.49890825152397156, 'eval_model_preparation_time': 0.0032, 'eval_accuracy': 0.835, 'eval_precision': 0.8245538648411269, 'eval_recall': 0.835, 'eval_f1': 0.8272529318440921, 'eval_runtime': 27.1453, 'eval_samples_per_second': 36.839, 'eval_steps_per_second': 4.605}


In [28]:
# Display the results of the model evaluation
results

{'eval_loss': 0.49890825152397156,
 'eval_model_preparation_time': 0.0032,
 'eval_accuracy': 0.835,
 'eval_precision': 0.8245538648411269,
 'eval_recall': 0.835,
 'eval_f1': 0.8272529318440921,
 'eval_runtime': 27.1453,
 'eval_samples_per_second': 36.839,
 'eval_steps_per_second': 4.605}

# Testing the Model

In [18]:
# Load the fine-tuned model and tokenizer from Hugging Face
model = AutoModelForSequenceClassification.from_pretrained(
    "sgbyteninja/sentiment_analysis_with_roBERTa"
)
tokenizer = AutoTokenizer.from_pretrained(
    "sgbyteninja/sentiment_analysis_with_roBERTa"
)

# List of example texts, including ambiguity
texts = [
    "The movie was fantastic, I couldn't stop laughing from start to finish.",  # Positive
    "The food was decent, but the portions were way too small for the price.",  # Neutral
    "I loved the concept of the new app, but it crashes every time I try to open it. What a waste of money...",  # Negative
    "The concert was incredible, I will definitely attend again next year.",  # Positive
    "The weather was nice today, perfect for a walk in the park.",  # Positive
    "I can't believe I spent money on this. What a waste.",  # Negative
    "It’s okay, I guess. Not great, but not terrible either.",  # Neutral

    # Irony and sarcasm examples
    "Oh, what a surprise, the service is slow again. I really wasn't expecting that! I will definitely never come here again!",  # Sarcastic (Negative)
    "Sure, I’d love to wait another hour for my coffee. Who doesn't enjoy a good delay? I am more than annoyed. Can't recommend this cafe.",  # Sarcastic (Negative)

    # Ambiguous statements
    "Wow, that was something...",  # Could be positive, neutral, or sarcastic
    "I have never seen anything like this before!",  # Could be positive (amazing) or negative (shocking)
    "This is an experience I won’t forget anytime soon.",  # Could be positive or negative
    "The product works as expected.",  # Neutral, but could imply disappointment if expectations were low
    "The team really pulled through in the end.",  # Could be positive or negative, depending on context
    "Not bad at all.",  # Technically positive, but mildly so
    "I wish I had known this before.",  # Could imply regret or satisfaction
]

# Corresponding labels (2 for negative, 1 for positive, 0 for neutral, None for ambiguous)
labels = [
    1, 0, 2, 1, 1, 2, 0,  # Original examples
    2, 2,  # Sarcasm
    None, None, None, None, None, None, None  # Ambiguous labels
]

# Tokenizing the texts
inputs = tokenizer(
    texts, padding=True, truncation=True, return_tensors="pt", max_length=512
)

# Perform inference and collect predictions
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

# Calculate probabilities for each class
probabilities = torch.nn.functional.softmax(logits, dim=-1)

# Get predicted classes (the index of the class with the highest probability)
predicted_classes = torch.argmax(probabilities, dim=-1)

# Output the predictions and compare with the true labels
for i, text in enumerate(texts):
    true_label = labels[i] if labels[i] is not None else "?"
    predicted_label = predicted_classes[i].item()
    prob = probabilities[i].tolist()

    print(f"Text: {text}")
    print(f"True label: {true_label} - Predicted label: {predicted_label}")
    print(f"Probabilities: Negative: {prob[2]:.4f}, Neutral: {prob[0]:.4f}, Positive: {prob[1]:.4f}")
    print("-" * 80)

config.json:   0%|          | 0.00/159k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/523M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/999k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/958 [00:00<?, ?B/s]

Text: The movie was fantastic, I couldn't stop laughing from start to finish.
True label: 1 - Predicted label: 1
Probabilities: Negative: 0.0004, Neutral: 0.0056, Positive: 0.9938
--------------------------------------------------------------------------------
Text: The food was decent, but the portions were way too small for the price.
True label: 0 - Predicted label: 0
Probabilities: Negative: 0.3641, Neutral: 0.5290, Positive: 0.1060
--------------------------------------------------------------------------------
Text: I loved the concept of the new app, but it crashes every time I try to open it. What a waste of money...
True label: 2 - Predicted label: 2
Probabilities: Negative: 0.9542, Neutral: 0.0380, Positive: 0.0071
--------------------------------------------------------------------------------
Text: The concert was incredible, I will definitely attend again next year.
True label: 1 - Predicted label: 1
Probabilities: Negative: 0.0004, Neutral: 0.0060, Positive: 0.9934
------