In [1]:
from datasets import load_dataset

# Load financial sentiment dataset
dataset = load_dataset('Jean-Baptiste/financial_news_sentiment_mixte_with_phrasebank_75')

# Use the columns 'summary_detail_with_title' for text and 'labels' for labels
dataset = dataset.rename_column("summary_detail_with_title", "text")
dataset = dataset.rename_column("labels", "label")

# Split the dataset into train and test
train_dataset = dataset['train']
test_dataset = dataset['test']


In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from itertools import product

# Load pre-trained tokenizer and model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

# Tokenize the dataset with padding to max length
def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

# Set format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Define metrics
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='macro')
    acc = accuracy_score(p.label_ids, preds)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

# Hyperparameter tuning
learning_rates = [2e-5, 3e-5]
num_epochs = [3, 4]
best_accuracy = 0
best_params = {}

for lr, epoch in product(learning_rates, num_epochs):
    training_args = TrainingArguments(
        output_dir='./results',  
        evaluation_strategy="epoch",
        learning_rate=lr,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=epoch,
        weight_decay=0.01
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    trainer.train()
    eval_result = trainer.evaluate()
    
    if eval_result['eval_accuracy'] > best_accuracy:
        best_accuracy = eval_result['eval_accuracy']
        best_params = {"learning_rate": lr, "num_train_epochs": epoch}

print(f"Best params: {best_params}, Best accuracy: {best_accuracy}")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4446 [00:00<?, ? examples/s]

Map:   0%|          | 0/785 [00:00<?, ? examples/s]

  0%|          | 0/834 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.2985990643501282, 'eval_accuracy': 0.8968152866242038, 'eval_f1': 0.8621164021164022, 'eval_precision': 0.8958780706525377, 'eval_recall': 0.8375281610724649, 'eval_runtime': 51.3086, 'eval_samples_per_second': 15.3, 'eval_steps_per_second': 0.974, 'epoch': 1.0}
{'loss': 0.3644, 'grad_norm': 13.122971534729004, 'learning_rate': 8.00959232613909e-06, 'epoch': 1.8}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.24950481951236725, 'eval_accuracy': 0.9159235668789809, 'eval_f1': 0.8926650338758898, 'eval_precision': 0.902553535616302, 'eval_recall': 0.8848077288583617, 'eval_runtime': 51.1886, 'eval_samples_per_second': 15.335, 'eval_steps_per_second': 0.977, 'epoch': 2.0}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.2916184067726135, 'eval_accuracy': 0.9146496815286624, 'eval_f1': 0.8949417705913413, 'eval_precision': 0.8985332821123411, 'eval_recall': 0.8927445069217222, 'eval_runtime': 51.2396, 'eval_samples_per_second': 15.32, 'eval_steps_per_second': 0.976, 'epoch': 3.0}
{'train_runtime': 2914.7569, 'train_samples_per_second': 4.576, 'train_steps_per_second': 0.286, 'train_loss': 0.27664396802870206, 'epoch': 3.0}


  0%|          | 0/50 [00:00<?, ?it/s]



  0%|          | 0/1112 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.3380712568759918, 'eval_accuracy': 0.9159235668789809, 'eval_f1': 0.8904395125025512, 'eval_precision': 0.9125043697657805, 'eval_recall': 0.8742285030892626, 'eval_runtime': 54.7761, 'eval_samples_per_second': 14.331, 'eval_steps_per_second': 0.913, 'epoch': 1.0}
{'loss': 0.1136, 'grad_norm': 0.08014512807130814, 'learning_rate': 1.1007194244604318e-05, 'epoch': 1.8}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.3575266897678375, 'eval_accuracy': 0.9210191082802548, 'eval_f1': 0.9003016748304432, 'eval_precision': 0.9198336804227406, 'eval_recall': 0.8836373276879607, 'eval_runtime': 51.8557, 'eval_samples_per_second': 15.138, 'eval_steps_per_second': 0.964, 'epoch': 2.0}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.4142042100429535, 'eval_accuracy': 0.9210191082802548, 'eval_f1': 0.8998463875713746, 'eval_precision': 0.9098976305498044, 'eval_recall': 0.8911061402200643, 'eval_runtime': 53.3866, 'eval_samples_per_second': 14.704, 'eval_steps_per_second': 0.937, 'epoch': 3.0}
{'loss': 0.0431, 'grad_norm': 0.04857219010591507, 'learning_rate': 2.0143884892086333e-06, 'epoch': 3.6}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.44439440965652466, 'eval_accuracy': 0.9210191082802548, 'eval_f1': 0.9022904206191805, 'eval_precision': 0.9062181073291283, 'eval_recall': 0.8989474381879444, 'eval_runtime': 52.2358, 'eval_samples_per_second': 15.028, 'eval_steps_per_second': 0.957, 'epoch': 4.0}
{'train_runtime': 3950.1624, 'train_samples_per_second': 4.502, 'train_steps_per_second': 0.282, 'train_loss': 0.0732142935982711, 'epoch': 4.0}


  0%|          | 0/50 [00:00<?, ?it/s]



  0%|          | 0/834 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.48238256573677063, 'eval_accuracy': 0.9235668789808917, 'eval_f1': 0.8995256818323659, 'eval_precision': 0.9113434451842073, 'eval_recall': 0.8889222230994384, 'eval_runtime': 52.3094, 'eval_samples_per_second': 15.007, 'eval_steps_per_second': 0.956, 'epoch': 1.0}
{'loss': 0.0543, 'grad_norm': 18.985328674316406, 'learning_rate': 1.2014388489208634e-05, 'epoch': 1.8}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.5084881782531738, 'eval_accuracy': 0.9248407643312102, 'eval_f1': 0.9098095136438236, 'eval_precision': 0.9285768287414468, 'eval_recall': 0.8955505846645088, 'eval_runtime': 50.9185, 'eval_samples_per_second': 15.417, 'eval_steps_per_second': 0.982, 'epoch': 2.0}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.4929242432117462, 'eval_accuracy': 0.9248407643312102, 'eval_f1': 0.9077022402471124, 'eval_precision': 0.9177022921844794, 'eval_recall': 0.8998654740426892, 'eval_runtime': 51.1396, 'eval_samples_per_second': 15.35, 'eval_steps_per_second': 0.978, 'epoch': 3.0}
{'train_runtime': 2940.2345, 'train_samples_per_second': 4.536, 'train_steps_per_second': 0.284, 'train_loss': 0.03961176494900271, 'epoch': 3.0}


  0%|          | 0/50 [00:00<?, ?it/s]



  0%|          | 0/1112 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.6284554600715637, 'eval_accuracy': 0.9121019108280255, 'eval_f1': 0.8795214822330853, 'eval_precision': 0.9275635721249103, 'eval_recall': 0.8458441693884732, 'eval_runtime': 51.8357, 'eval_samples_per_second': 15.144, 'eval_steps_per_second': 0.965, 'epoch': 1.0}
{'loss': 0.0344, 'grad_norm': 0.908665657043457, 'learning_rate': 1.6510791366906477e-05, 'epoch': 1.8}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.5701509118080139, 'eval_accuracy': 0.9210191082802548, 'eval_f1': 0.9086191358991912, 'eval_precision': 0.9204272433276057, 'eval_recall': 0.8997359189764254, 'eval_runtime': 51.9017, 'eval_samples_per_second': 15.125, 'eval_steps_per_second': 0.963, 'epoch': 2.0}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.5947762727737427, 'eval_accuracy': 0.9222929936305733, 'eval_f1': 0.9076999897710191, 'eval_precision': 0.9185891654206259, 'eval_recall': 0.8988519580924644, 'eval_runtime': 51.7967, 'eval_samples_per_second': 15.155, 'eval_steps_per_second': 0.965, 'epoch': 3.0}
{'loss': 0.0109, 'grad_norm': 0.23762013018131256, 'learning_rate': 3.0215827338129496e-06, 'epoch': 3.6}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.5921365022659302, 'eval_accuracy': 0.9273885350318471, 'eval_f1': 0.9088065798592114, 'eval_precision': 0.9170096841329718, 'eval_recall': 0.9016239608644673, 'eval_runtime': 51.8497, 'eval_samples_per_second': 15.14, 'eval_steps_per_second': 0.964, 'epoch': 4.0}
{'train_runtime': 3877.8262, 'train_samples_per_second': 4.586, 'train_steps_per_second': 0.287, 'train_loss': 0.020876711381853912, 'epoch': 4.0}


  0%|          | 0/50 [00:00<?, ?it/s]

Best params: {'learning_rate': 3e-05, 'num_train_epochs': 4}, Best accuracy: 0.9273885350318471


In [5]:
# Retrain with best params
training_args = TrainingArguments(
    output_dir='./results',  # This is required by TrainingArguments
    evaluation_strategy="epoch",
    learning_rate=best_params["learning_rate"],
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=best_params["num_train_epochs"],
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()



  0%|          | 0/1112 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.7002525329589844, 'eval_accuracy': 0.9197452229299363, 'eval_f1': 0.8993005539491791, 'eval_precision': 0.8976980555568401, 'eval_recall': 0.9009923652961628, 'eval_runtime': 51.0385, 'eval_samples_per_second': 15.381, 'eval_steps_per_second': 0.98, 'epoch': 1.0}
{'loss': 0.0273, 'grad_norm': 0.020316896960139275, 'learning_rate': 1.6510791366906477e-05, 'epoch': 1.8}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.6287859082221985, 'eval_accuracy': 0.9171974522292994, 'eval_f1': 0.9012993700875972, 'eval_precision': 0.901883844829675, 'eval_recall': 0.9007673301344189, 'eval_runtime': 52.5599, 'eval_samples_per_second': 14.935, 'eval_steps_per_second': 0.951, 'epoch': 2.0}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.6593212485313416, 'eval_accuracy': 0.9286624203821656, 'eval_f1': 0.9070949405446073, 'eval_precision': 0.9251331666008512, 'eval_recall': 0.8913217404356645, 'eval_runtime': 52.2248, 'eval_samples_per_second': 15.031, 'eval_steps_per_second': 0.957, 'epoch': 3.0}
{'loss': 0.0122, 'grad_norm': 0.0023053139448165894, 'learning_rate': 3.0215827338129496e-06, 'epoch': 3.6}


  0%|          | 0/50 [00:00<?, ?it/s]

{'eval_loss': 0.6130384802818298, 'eval_accuracy': 0.9248407643312102, 'eval_f1': 0.9023609708662198, 'eval_precision': 0.9118815925538614, 'eval_recall': 0.8935576277348428, 'eval_runtime': 52.2878, 'eval_samples_per_second': 15.013, 'eval_steps_per_second': 0.956, 'epoch': 4.0}
{'train_runtime': 3958.6091, 'train_samples_per_second': 4.492, 'train_steps_per_second': 0.281, 'train_loss': 0.01782830698053828, 'epoch': 4.0}


TrainOutput(global_step=1112, training_loss=0.01782830698053828, metrics={'train_runtime': 3958.6091, 'train_samples_per_second': 4.492, 'train_steps_per_second': 0.281, 'total_flos': 588960557512704.0, 'train_loss': 0.01782830698053828, 'epoch': 4.0})

In [6]:
# Save the best model
model.save_pretrained('./group2_fin_model')
tokenizer.save_pretrained('./group2_fin_model')

('./group2_fin_model\\tokenizer_config.json',
 './group2_fin_model\\special_tokens_map.json',
 './group2_fin_model\\vocab.txt',
 './group2_fin_model\\added_tokens.json',
 './group2_fin_model\\tokenizer.json')