This notebook tests fine-tuning a pretrained HuggingFace model.

This cell tests fine-tuning the Twitter Roberta Base Sentiment Model (https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment). It fine tunes with the kaggle social media analysis dataset (https://www.kaggle.com/datasets/kashishparmar02/social-media-sentiments-analysis-dataset).

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from evaluate import load
import pandas as pd

# 1. Load Dataset
def load_dataset_from_csv(file_path):
    df = pd.read_csv(file_path).drop(columns=["Unnamed: 0"], errors="ignore")
    return Dataset.from_pandas(df)

train_dataset = load_dataset_from_csv("../data/SocialMediaSentimentsAnalysisDataset.csv")
test_dataset = load_dataset_from_csv("../data/SocialMediaSentimentsAnalysisDataset.csv")

# 2. Map Sentiment Labels to Integers (Ignore unknown labels)
sentiment_mapping = {
    "Positive": 2,
    "Neutral": 1,
    "Negative": 0
}

def map_labels(example):
    sentiment = example["Sentiment"].strip()
    if sentiment in sentiment_mapping:
        example["labels"] = sentiment_mapping[sentiment]
        return example
    else:
        return None  # Skipping the unknown labels

# Apply mapping and remove invalid examples
train_dataset = train_dataset.filter(lambda example: example["Sentiment"].strip() in sentiment_mapping)
test_dataset = test_dataset.filter(lambda example: example["Sentiment"].strip() in sentiment_mapping)

train_dataset = train_dataset.map(map_labels)
test_dataset = test_dataset.map(map_labels)

# 3. Load Tokenizer and Model
MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=3)  # 3 labels: negative, neutral, positive

# 4. Tokenize Dataset
def tokenize_function(example):
    return tokenizer(example["Text"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# 5. Remove Unnecessary Columns (Hugging Face Trainer needs tensors only)
train_dataset = train_dataset.remove_columns(["Text", "Sentiment", "Timestamp", "User", "Platform", "Hashtags", "Retweets", "Likes", "Country", "Year", "Month", "Day", "Hour"])
test_dataset = test_dataset.remove_columns(["Text", "Sentiment", "Timestamp", "User", "Platform", "Hashtags", "Retweets", "Likes", "Country", "Year", "Month", "Day", "Hour"])

# 6. Set Up Trainer and Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)

# 7. Define Accuracy Metric
accuracy = load("accuracy")

def compute_metrics(eval_pred):
    import numpy as np
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# 8. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

# 9. Fine-Tune the Model
trainer.train()

# 10. Evaluate and Save the Fine-Tuned Model
trainer.evaluate()
model.save_pretrained("./fine_tuned_twitter_roberta")
tokenizer.save_pretrained("./fine_tuned_twitter_roberta")

print("Fine-tuning complete and model saved!")

# 11. Test the Fine-Tuned Model
from transformers import pipeline

fine_tuned_pipeline = pipeline("text-classification", model="./fine_tuned_twitter_roberta", tokenizer="./fine_tuned_twitter_roberta")
result = fine_tuned_pipeline("I love this song so much 😍")
print("Test:", result)


Filter:   0%|          | 0/732 [00:00<?, ? examples/s]

Filter:   0%|          | 0/732 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

This cell tests on the Multilingual Uncased Sentiment Model (good for Yelp) (https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment). It fine-tunes on the Yelp Business dataset (https://business.yelp.com/data/resources/open-dataset/).

In [1]:
# Import necessary libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, pipeline
from datasets import Dataset
from evaluate import load
import pandas as pd
import numpy as np
import json

# 1. Load Dataset from JSON with limit
def load_dataset_from_json(file_path, limit):  # Limit to 10,000 reviews by default
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f):
            if i >= limit:
                break
            data.append(json.loads(line))
    df = pd.DataFrame(data)
    return Dataset.from_pandas(df)

# Adjust paths and set limit (e.g., 10,000 reviews)
train_dataset = load_dataset_from_json("../data/yelp_dataset/yelp_academic_dataset_review.json", limit=100)
test_dataset = load_dataset_from_json("../data/yelp_dataset/yelp_academic_dataset_review.json", limit=50)

# 2. Load Tokenizer and Model
MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=5)  # Yelp uses 1-5 stars

# 3. Map Sentiment Labels to Integers (1 to 5 stars)
def map_labels(example):
    try:
        example["labels"] = int(example["stars"]) - 1  # Labels go from 0 to 4 (for 5 stars)
        return example
    except ValueError:
        return None  # Ignore invalid labels

# Apply mapping and filter out invalid labels
train_dataset = train_dataset.filter(lambda example: "stars" in example and isinstance(example["stars"], (int, float)))
test_dataset = test_dataset.filter(lambda example: "stars" in example and isinstance(example["stars"], (int, float)))

train_dataset = train_dataset.map(map_labels)
test_dataset = test_dataset.map(map_labels)

# 4. Tokenize Dataset
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Remove original text and stars columns (Hugging Face Trainer needs tensors only)
train_dataset = train_dataset.remove_columns(["text", "stars"])
test_dataset = test_dataset.remove_columns(["text", "stars"])

# 5. Set Up Trainer and Training Arguments
training_args = TrainingArguments(
    output_dir="./yelp_results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)

# Define accuracy metric
accuracy = load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

# 6. Fine-Tune the Model
trainer.train()

# 7. Evaluate and Save the Fine-Tuned Model
trainer.evaluate()
model.save_pretrained("./fine_tuned_yelp_bert")
tokenizer.save_pretrained("./fine_tuned_yelp_bert")

print("Fine-tuning complete and model saved!")

# 8. Test the Fine-Tuned Model (Optional)
fine_tuned_pipeline = pipeline("sentiment-analysis", model="./fine_tuned_yelp_bert", tokenizer="./fine_tuned_yelp_bert")
test_cases = [
    "The food was fantastic, but the service was slow.",
    "Absolutely awful. Never coming back.",
    "Great food, great service. Highly recommend!"
]

for i, text in enumerate(test_cases, 1):
    result = fine_tuned_pipeline(text)
    print(f"Test Case {i}: {text} Classification: {result}")


Filter:   0%|          | 0/100 [00:00<?, ? examples/s]

Filter:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.429813,0.94
2,No log,0.188837,0.96
3,No log,0.117968,0.96


Fine-tuning complete and model saved!


Device set to use cpu


Test Case 1: The food was fantastic, but the service was slow. Classification: [{'label': '4 stars', 'score': 0.7007597088813782}]
Test Case 2: Absolutely awful. Never coming back. Classification: [{'label': '1 star', 'score': 0.9797160625457764}]
Test Case 3: Great food, great service. Highly recommend! Classification: [{'label': '5 stars', 'score': 0.9886147975921631}]
