In [None]:
!pip install transformers
!pip install datasets
!pip install transformers[torch]

In [None]:
from datasets import load_dataset

dataset_name = "vamossyd/finance_emotions"

train_dataset = load_dataset(dataset_name, split="train[:90%]")
test_dataset = load_dataset(dataset_name, split="train[90%:]")

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

train_dataset[100]


In [None]:
str_to_int = {
    "neutral": 0,
    "sad": 1,
    "anger": 2,
    "disgust": 3,
    "surprise": 4,
    "fear": 5,
    "happy": 6,
}

train_dataset[100:103]




In [None]:
def tokenize_function(batch):
  tokenized_batch = tokenizer(batch['cleaned_text'],
                              padding="max_length",
                              truncation = True)
  tokenized_batch['label'] = [str_to_int[label] for label in batch['label']]

  return tokenized_batch

In [None]:
t_data = train_dataset.map(tokenize_function, batched=True)
test_data = test_dataset.map(tokenize_function, batched=True)


In [None]:
#importing the model

from transformers import AutoModelForSequenceClassification
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=7).to(device)

from transformers import TrainingArguments

train_arg = TrainingArguments(output_dir="first_model")

In [None]:
import numpy as np
!pip install evaluate
import evaluate

In [None]:

metric = evaluate.load("accuracy")

def compute_accuracy(eval_pred):
    logits, labels = eval_pred  # Fixed typo in variable name
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)  # Fixed 'labels' typo


training_args = TrainingArguments(output_dir="my_first_model", evaluation_strategy="epoch")

from transformers import Trainer


In [None]:
# training_args = TrainingArguments(
#     output_dir="my_first_model",
#     evaluation_strategy="epoch",
#     per_device_train_batch_size=8,  # Adjust batch size to avoid OOM errors
#     per_device_eval_batch_size=8,
#     fp16=True  # Enables mixed precision training (faster and lower memory usage)
#     weight_decay=0.05
# )

training_args = TrainingArguments(output_dir="my_first_model")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=t_data,
    eval_dataset=test_data,
    compute_metrics=compute_accuracy
)


In [None]:
trainer.train()

In [None]:
trainer.evaluate()


In [None]:
text = "Stock prices are dropping rapidly!"
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model(**inputs)
predicted_label = torch.argmax(outputs.logits, axis=1).item()
print(f"Predicted Emotion: {list(str_to_int.keys())[predicted_label]}")




In [None]:
model.save_pretrained("my_finetuned_bert", push_to_hub=True, private=True)
tokenizer.save_pretrained("my_finetuned_bert")
