# Deep Learning
<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/marcinsawinski/UEP_KIE_DL_CODE2024/blob/main/dl07_fientune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  </td>
</table>

In [1]:
%pip install -U huggingface_hub datasets huggingface_hub[hf_xet]



In [2]:
import os
os.environ["HF_DATASETS_CACHE"] = "/content/hf_cache"
os.environ["HF_HOME"] = "/content/hf_cache"
from datasets import load_dataset, DatasetDict

# load dataset

In [3]:
full_dataset = load_dataset("stanfordnlp/sst2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
dataset = DatasetDict({
    "train": full_dataset["train"].shuffle(seed=42).select(range(4000)),
    "validation": full_dataset["validation"],
    "test": full_dataset["test"],
})

In [5]:
dataset

DatasetDict({
    train: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 4000
    })
    validation: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 872
    })
    test: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 1821
    })
})

# model setup

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

In [7]:
model_name= "distilbert-base-uncased"

In [8]:
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [9]:
def tokenize(batch):
    return tokenizer(batch["sentence"], padding="max_length", truncation=True)

In [10]:
tokenized = dataset.map(tokenize, batched=True)
tokenized.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

In [11]:
# Model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
# Metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds)
    }

In [13]:
run_name = "distilbert-3000-steps-es"

In [14]:
# Training args
# args = TrainingArguments(
#     output_dir="./results",
#     eval_strategy="epoch",
#     save_strategy="no",
#     logging_strategy="epoch",
#     learning_rate=2e-5,
#     per_device_train_batch_size=16,
#     per_device_eval_batch_size=16,
#     num_train_epochs=2,
#     weight_decay=0.01,
#     report_to="wandb",  # W&B logging
#     run_name = run_name
# )

args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=20,
    save_strategy="best",
    logging_strategy="steps",
    logging_steps=20,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="wandb",
    load_best_model_at_end=True,
    metric_for_best_model="f1",        # 👈 use F1 for tracking best model
    greater_is_better=True,             # 👈 higher F1 is better
    run_name = run_name
)

In [15]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmarcinsawinski[0m ([33mFactUE[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [16]:
# Trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
)

run = wandb.init(
    project="dl_finetune_huggingface",   # 👈 your W&B project name
    entity="uep-kie-dl25", # 👈 your W&B username or team
    name="distilbert-3000-steps",# 👈 optional run name
)
trainer.train()
run.finish()

Step,Training Loss,Validation Loss,Accuracy,F1
20,0.6755,0.63009,0.774083,0.785169
40,0.5345,0.476085,0.799312,0.771838
60,0.3663,0.459515,0.795872,0.760753
80,0.3447,0.383858,0.833716,0.841183
100,0.3598,0.377965,0.840596,0.828607
120,0.315,0.346573,0.844037,0.844749
140,0.383,0.339424,0.860092,0.865342
160,0.3234,0.329065,0.858945,0.854093
180,0.3544,0.322345,0.862385,0.865772
200,0.3116,0.363032,0.840596,0.854145


0,1
eval/accuracy,▁▃▃▅▆▆▇▇▇▆▆▇██▇█████████
eval/f1,▂▂▁▆▅▆▇▇▇▇▇▇▇█▇████▇████
eval/loss,█▄▄▂▂▂▁▁▁▂▂▁▁▁▂▂▂▃▂▂▃▃▃▃
eval/runtime,▁▄█▄▇▆▅█▅▅▆▅▅▅▆▆▅▆▆▇▆▄▆▆
eval/samples_per_second,█▄▁▄▂▃▃▁▃▄▃▃▄▃▃▃▄▂▃▂▃▄▃▃
eval/steps_per_second,█▄▁▄▂▃▃▁▃▄▃▃▄▃▃▃▄▂▃▂▃▄▃▃
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▁▂▂▂▂▃▂▂▂▃▃▃▂▂▂▁▇▂▆▆▅▄▁█
train/learning_rate,██▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁

0,1
eval/accuracy,0.86583
eval/f1,0.86927
eval/loss,0.39901
eval/runtime,13.9008
eval/samples_per_second,62.73
eval/steps_per_second,3.957
total_flos,1017349621678080.0
train/epoch,1.92
train/global_step,480.0
train/grad_norm,39.77822


In [17]:
trained_model_path = "./bert-sst2-ft-demo"

In [18]:
# model.cpu()
model.save_pretrained(trained_model_path)
tokenizer.save_pretrained(trained_model_path)

('./bert-sst2-ft-demo/tokenizer_config.json',
 './bert-sst2-ft-demo/special_tokens_map.json',
 './bert-sst2-ft-demo/vocab.txt',
 './bert-sst2-ft-demo/added_tokens.json',
 './bert-sst2-ft-demo/tokenizer.json')

# infer with trainer

In [19]:
# Run inference on the test dataset with trainer
run = wandb.init(
    project="dl_finetune_huggingface",   # 👈 your W&B project name
    entity="uep-kie-dl25", # 👈 your W&B username or team
    name="predict",# 👈 optional run name
)
predictions = trainer.predict(tokenized["validation"])
run.finish()

0,1
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁
test/samples_per_second,▁
test/steps_per_second,▁

0,1
test/accuracy,0.86583
test/f1,0.86927
test/loss,0.39901
test/runtime,13.6535
test/samples_per_second,63.866
test/steps_per_second,4.028


# load model and make inference with a pipeline

In [20]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification,pipeline

ft_model = AutoModelForSequenceClassification.from_pretrained(trained_model_path)
ft_tokenizer = AutoTokenizer.from_pretrained(trained_model_path)

In [21]:
# Sentiment pipeline for binary classification
classifier = pipeline("text-classification", model=ft_model, tokenizer=ft_tokenizer)

# Run inference
text = "I really hate this movie!"
result = classifier(text)
print(result)

Device set to use cuda:0


[{'label': 'LABEL_0', 'score': 0.9690461158752441}]


In [22]:
label_map = {0: "Negative", 1: "Positive"}
label_idx = int(result[0]["label"].split("_")[-1])
print(f"Predicted: {label_map[label_idx]} (score={result[0]['score']:.3f})")

Predicted: Negative (score=0.969)
