In [5]:
import tensorflow as tf

In [6]:
print(tf.config.list_physical_devices())

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [7]:

physical_devices = tf.config.list_physical_devices('GPU')
logical_gpus = tf.config.experimental.set_memory_growth(physical_devices[0], True) if physical_devices else []

if logical_gpus:
  print("GPU is available!")
  device = tf.device('/device:GPU:0')  # Set device to first GPU
else:
  print("GPU is not available. Training on CPU.")
  device = tf.device('/device:CPU:0')  # Fallback to CPU

GPU is not available. Training on CPU.


In [None]:
from datasets import load_dataset

dataset_name = "vamossyd/finance_emotions"

In [None]:
train_dataset = load_dataset(dataset_name, split="train[:90%]")
test_dataset = load_dataset(dataset_name, split="train[90%:]")

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [None]:
str_to_int = {
    "neutral": 0,
    "sad": 1,
    "anger": 2,
    "disgust": 3,
    "surprise": 4,
    "fear": 5,
    "happy": 6,
}

In [None]:
def tokenize_batch(batch):
    tokenized_batch = tokenizer(
        batch['cleaned_text'],
        padding="max_length",
        truncation=True
    )

    tokenized_batch['label'] = [str_to_int[label] for label in batch['label']]
    
    return tokenized_batch

In [None]:
tokenized_train_data = train_dataset.map(tokenize_batch, batched=True).to(device)
tokenized_test_data = test_dataset.map(tokenize_batch, batched=True).to(device)


In [None]:
print(tokenized_train_data[99])

In [None]:
from transformers import AutoModelForSequenceClassification


In [None]:
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=7)
model = model.to(device)

In [None]:
import numpy as np
import evaluate

In [None]:
metric = evaluate.load("accuracy")

In [None]:
def compute_accuracy_metric(eval_pred):
    logits, labels = eval_pred
    
    predictions = np.argmax(logits, axis=-1)

    return metric.compute(predictions=predictions, references=labels)

In [None]:
from transformers import TrainingArguments
training_args = TrainingArguments(output_dir="reTrained_Model", evaluation_strategy="epoch", device=device)

In [None]:


def save_if_above_goal_accuracy(trainer, eval_result):
    # Extract accuracy from eval results
    accuracy = eval_result.get("eval_accuracy")

    if accuracy and accuracy > 0.95:
        trainer.save_model()
        print(f"Model saved to {training_args.output_dir} with accuracy: {accuracy:.4f}")
    else:
        print(f"Failed: Model accuracy: {accuracy:.4f}")


In [None]:
from transformers import Trainer

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_data,
    eval_dataset=tokenized_test_data,
    compute_metrics=compute_accuracy_metric,
    device=device,
    callbacks=[save_if_above_goal_accuracy]
)

In [None]:
trainer.train()