In [None]:
!pip install transformers datasets torch scikit-learn

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd

In [None]:
dataset = load_dataset("glue", "sst2")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")  # Use RoBERTa tokenizer

In [4]:
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [5]:
dataset["train"][0]["label"]

0

In [6]:
model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
training_args = TrainingArguments(
    output_dir="./sentiment_results",  # Save RoBERTa results separately
    per_device_train_batch_size=16,  # Adjust batch size as needed
    per_device_eval_batch_size=64,
    num_train_epochs=3,             # Adjust number of epochs
    learning_rate=2e-5,            # Adjust learning rate
    eval_strategy="epoch",
    save_strategy="epoch",         # Save checkpoints after each epoch
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
    # Add other hyperparameters to tune here (e.g., weight_decay, warmup_steps)
)

In [8]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="binary")  # Use 'binary' for SST-2
    return {"accuracy": accuracy, "f1": f1}

In [9]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
)

In [10]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.208,0.255575,0.925459,0.926719
2,0.1669,0.290202,0.934633,0.937017
3,0.1101,0.28912,0.941514,0.942889


TrainOutput(global_step=12630, training_loss=0.1789536261690673, metrics={'train_runtime': 4082.5002, 'train_samples_per_second': 49.491, 'train_steps_per_second': 3.094, 'total_flos': 5.316079940232192e+16, 'train_loss': 0.1789536261690673, 'epoch': 3.0})

In [11]:
best_model_checkpoint = trainer.state.best_model_checkpoint
best_model = AutoModelForSequenceClassification.from_pretrained(best_model_checkpoint)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
best_model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [12]:
def predict_in_batches(model, dataset, batch_size=16):
    all_predictions = []
    for i in range(0, len(dataset), batch_size):
        batch = dataset[i : i + batch_size]
        input_ids = torch.tensor(batch["input_ids"]).to(device)
        attention_mask = torch.tensor(batch["attention_mask"]).to(device)

        batch_input = {"input_ids": input_ids, "attention_mask": attention_mask}
        with torch.no_grad():
            outputs = model(**batch_input)
            logits = outputs.logits.to("cpu")  # Move to CPU to avoid errors
            predictions = logits.detach().numpy()
        all_predictions.append(predictions)

    return np.concatenate(all_predictions, axis=0)

In [16]:
predictions = predict_in_batches(best_model, tokenized_datasets["validation"]) # You can change this to validation.
pred_labels = np.argmax(predictions, axis=-1)

true_labels = tokenized_datasets["validation"]["label"] # You can change this to validation.

test_metrics = compute_metrics((predictions, true_labels)) # Compute the metrics.
print(f"Test metrics: {test_metrics}")

Test metrics: {'accuracy': 0.9415137614678899, 'f1': 0.9428891377379619}


In [None]:
# save_metrics_to_csv(test_metrics, filename="sst2_test_metrics.csv")

Metrics saved to validation_metrics.csv
Validation set metrics: {'accuracy': 0.8341323106423778, 'f1': 0.8865573770491804, 'mcc': 0.5936351080219947}


In [None]:
best_model.save_pretrained("./sst_best_roberta_model")

In [None]:
torch.save(best_model.state_dict(), "./sentiment_model.pt") # Download the model

In [19]:
def predict_single_sentence(sentence, model, tokenizer):
    inputs = tokenizer(sentence, padding="max_length", truncation=True, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class = logits.argmax().item()
    return "Positive" if predicted_class == 1 else "Negative"

example_sentence = "This was an bad movie!"
prediction = predict_single_sentence(example_sentence, best_model, tokenizer)
print(f"'{example_sentence}' has sentiment: {prediction}")

'This was an bad movie!' has sentiment: Negative
