In [1]:
import pandas as pd
from pysentimiento import create_analyzer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from huggingface_hub import login

  from .autonotebook import tqdm as notebook_tqdm


#### Fine Tune the Base Model

In [None]:
# Load and preprocess dataset
dataset = load_dataset('csv', data_files={'train': 'train.csv', 'test': 'test.csv'})

tokenizer = AutoTokenizer.from_pretrained('vinai/bertweet-base')

def preprocess_function(examples):
    return tokenizer(examples['text'], truncation=True, padding=True)

encoded_dataset = dataset.map(preprocess_function, batched=True)

# Load model
model = AutoModelForSequenceClassification.from_pretrained('vinai/bertweet-base', num_labels=3)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset['train'],
    eval_dataset=encoded_dataset['test'],
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Log in to HuggingFace Hub
login(token='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')

# Push the model to HuggingFace Hub
trainer.push_to_hub("bertweet-sentiment-analysis")


In [3]:
# Read in data
test_df = pd.read_csv("../EDA/multi_sampled.csv")

# Randomly sample 5,000 entries
test_df = test_df.sample(n=200, random_state=1)

# Change labels from numbers to their meaning
label_dict = {0:'NEG', 1:'NEU', 2:'POS'}
test_df['label'] = test_df['label'].map(label_dict)

texts = test_df["text"].tolist()
true_labels = test_df["label"].tolist()

In [4]:
# Create the analyzer
analyzer = create_analyzer(task="sentiment", lang="en")

# Make predictions
predicted_labels = []
for text in texts:
    prediction = analyzer.predict(text)
    predicted_labels.append(prediction.output)

In [5]:
# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.71
Precision: 0.720187908496732
Recall: 0.71
F1 Score: 0.6948421052631579
