In [None]:
import sys
import os

# Add the parent directory to the path so Python can find the toolbox package
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    print(f"Added {module_path} to sys.path")

In [1]:
model_name = 'bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european'

In [2]:
import wandb

wandb.login()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: nestorojedagonzalez (nojeda) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

In [3]:
from datasets import load_dataset
ds = load_dataset("nojedag/financial_phrasebank_multilingual")

In [None]:
from toolbox.utils import transform_labels
dataset = ds.map(transform_labels)

In [5]:
# let's train a Distilbert model

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-multilingual-uncased', num_labels=3)

# let's tokenize the data for the model to be able to understand
def tokenize_data(example):
    return tokenizer(example['sentence'], padding='max_length')    

In [6]:
dataset = dataset.map(tokenize_data, batched=True)

In [7]:
from transformers import AutoModelForSequenceClassification

# Loading a pretrain model while specifying the number of labels in our dataset for fine-tuning
model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-multilingual-uncased", num_labels=3)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
# the default batch size for training arguments
batch_size = 8

# set number of epochs
number_of_epochs = 2
# let set the logging steps
logging_steps = len(dataset['train']) // batch_size # it should log each batch 

steps = (len(dataset['train']) / batch_size) * number_of_epochs
warmup_steps = int(0.2 * steps)

In [9]:
import torch
from transformers import Trainer

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None
    ):
        labels=inputs.pop("labels")
        # forward pass
        outputs=model(**inputs)
        logits=outputs.get("logits")
        # compute custom loss (suppose one has 3 labels with different weights)
        loss_fct=torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0,2.0,3.0], device=model.device))
        loss=loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss




In [None]:
from transformers import TrainingArguments
from toolbox.utils import get_output_dir
training_args = TrainingArguments(
                                  num_train_epochs=number_of_epochs, 
                                  load_best_model_at_end=True,
                                  eval_strategy='steps', 
                                  save_strategy='steps',
                                  learning_rate=2e-5,
                                  logging_steps=logging_steps,
                                  warmup_steps= warmup_steps,
                                  save_steps=1000,
                                  eval_steps=500,
                                  output_dir=get_output_dir(model_name),
                                  report_to="wandb"
                                )

In [11]:
train_dataset = dataset['train'].shuffle(seed=10) 
eval_dataset = dataset['test'].shuffle(seed=10)

In [12]:
from transformers import Trainer

trainer = CustomTrainer(
    model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset
)

In [13]:
# Launch the learning process: training
# Load model from checkpoint
trainer.train()



Step,Training Loss,Validation Loss
500,No log,0.573704
1000,No log,0.536138
1500,No log,0.491532
2000,0.607400,0.560012
2500,0.607400,0.480471
3000,0.607400,0.453642


TrainOutput(global_step=3068, training_loss=0.48701986762776367, metrics={'train_runtime': 987.9597, 'train_samples_per_second': 24.835, 'train_steps_per_second': 3.105, 'total_flos': 6455750817374208.0, 'train_loss': 0.48701986762776367, 'epoch': 2.0})

In [14]:
import numpy as np
import evaluate

metric=evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels=eval_pred
    predictions=np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [15]:

trainer_eval = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

In [16]:
trainer_eval.evaluate()

{'eval_loss': 0.5353044271469116,
 'eval_model_preparation_time': 0.0032,
 'eval_accuracy': 0.8103841764929631,
 'eval_runtime': 43.8828,
 'eval_samples_per_second': 119.819,
 'eval_steps_per_second': 14.994}

In [17]:
model.save_pretrained(model_name)
model.push_to_hub(f'nojedag/{model_name}')

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/nojedag/bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european/commit/68bd8cd10ed730cfcc8d9bb8a2c1cdd6d0742db8', commit_message='Upload BertForSequenceClassification', commit_description='', oid='68bd8cd10ed730cfcc8d9bb8a2c1cdd6d0742db8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/nojedag/bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european', endpoint='https://huggingface.co', repo_type='model', repo_id='nojedag/bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european'), pr_revision=None, pr_num=None)

In [18]:
trainer.push_to_hub()
trainer_eval.push_to_hub()

training_args.bin:   0%|          | 0.00/5.50k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/nojedag/bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european/commit/c532cfa31009c1f395522dc5ead449b0be42b991', commit_message='End of training', commit_description='', oid='c532cfa31009c1f395522dc5ead449b0be42b991', pr_url=None, repo_url=RepoUrl('https://huggingface.co/nojedag/bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european', endpoint='https://huggingface.co', repo_type='model', repo_id='nojedag/bert-base-multilingual-uncased-finetuned-financial-news-sentiment-analysis-european'), pr_revision=None, pr_num=None)

In [19]:
wandb.finish()

0,1
eval/accuracy,▁
eval/loss,█▆▃▇▃▁▆
eval/model_preparation_time,▁
eval/runtime,█▄▃▃▁▁▂
eval/samples_per_second,▁▅▆▆██▇
eval/steps_per_second,▁▅▆▆██▇
train/epoch,▁▂▄▄▅▆███
train/global_step,▂▃▄▄▆▇███▁
train/grad_norm,▁█
train/learning_rate,█▁

0,1
eval/accuracy,0.81038
eval/loss,0.5353
eval/model_preparation_time,0.0032
eval/runtime,43.8828
eval/samples_per_second,119.819
eval/steps_per_second,14.994
total_flos,6455750817374208.0
train/epoch,2.0
train/global_step,0.0
train/grad_norm,16.67884
