# Tema 5: Fine-tuning

## Ejercicio 1
Hacer fine-tuning de BERT para análisis de sentimientos sobre opiniones de películas con el dataset rotten_tomatoes de HuggingFace.

### Apartado a
Consultar datos del dataset.

In [None]:
from datasets import load_dataset_builder, get_dataset_split_names

ds = load_dataset_builder("rotten_tomatoes")

print("Descripción del dataset: ", ds.info.description)
print("Características del dataset: ", ds.info.features)

In [None]:
get_dataset_split_names("rotten_tomatoes")

### Apartado b
Cargar el dataset.

In [None]:
from datasets import load_dataset

dataset = load_dataset("rotten_tomatoes")

labels = dataset['train'].features['label'].names
NUM_LABELS = len(labels)
print('Labels: ', labels, ', número de labels: ', NUM_LABELS)

### Apartado c
Tokenización con el tokenizer de BERT.

In [None]:
from transformers import AutoTokenizer

model_id = "bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
MAX_LENGTH = max([len(tokenizer(text).input_ids) for text in dataset['train']['text']])
print("Tamaño máximo en el train: ", MAX_LENGTH)

In [None]:
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", max_length=MAX_LENGTH)


encoded_data = dataset.map(tokenize, batched=True)
encoded_data

In [None]:
small_train_dataset = encoded_data["train"].shuffle(seed=42).select(range(1000))
small_validation_dataset = encoded_data["validation"].shuffle(seed=42).select(range(500))
small_test_dataset = encoded_data["test"].shuffle(seed=42).select(range(500))

full_train_dataset = encoded_data["train"]
full_validation_dataset = encoded_data["validation"]
full_test_dataset = encoded_data["test"]

In [None]:
import random

for i in range(10):
    index = random.randint(0, small_train_dataset.num_rows)
    print('text:', index, ' len:', len(small_train_dataset[index]['input_ids']))

### Apartado d
Cargar modelo preentrenado y configurar hiperparámetros.

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=NUM_LABELS)

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(output_dir="./outputs", report_to="none")

### Apartado e
Definir métricas y entrenar.

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


def compute_metrics(pred):
    y_true = pred.label_ids
    y_pred = pred.predictions.argmax(-1)

    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    train_dataset=small_train_dataset,
    eval_dataset=small_validation_dataset,
    args=args,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

### Apartado f
Evaluar en test.

In [None]:
def get_prediction(text):
    inputs = tokenizer(text, padding="max_length", max_length=MAX_LENGTH, truncation=True, return_tensors="pt").to(model.device)
    pred = model(**inputs).logits
    probs = pred.softmax(1)
    return probs.argmax().item()

In [None]:
from sklearn.metrics import classification_report

y_pred = [get_prediction(text) for text in small_test_dataset['text']]
y_true = small_test_dataset['label']

print(classification_report(y_true=y_true, y_pred=y_pred, target_names=labels))

## Ejercicio 2
Utilizar el modelo entrenado para inferir sobre textos nuevos no presentes en el dataset.

In [None]:
test_texts = [
    'i hate you too much',
    'I did not like the film at all',
    'I loved the movie'
]

for text in test_texts:
    inputs = tokenizer(text, padding="max_length", max_length=MAX_LENGTH, truncation=True, return_tensors="pt").to(model.device)
    outputs = model(**inputs)
    probs = outputs.logits.softmax(1)

    print(f"Texto: '{text}'")
    print(f"Probabilidades: {probs}")
    print(f"Predicción: {labels[probs.argmax().item()]}")
    print()

## Ejercicio 3
Comparar distintos modelos preentrenados para la misma tarea de análisis de sentimientos.

### Apartado a
Definir lista de modelos a comparar y función de entrenamiento.

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from datasets import load_dataset
import numpy as np

model_ids = [
    "bert-base-cased",
    "distilbert-base-uncased",
    "albert-base-v2",
    "xlm-roberta-base",
]

dataset = load_dataset("rotten_tomatoes")
labels = dataset['train'].features['label'].names
NUM_LABELS = len(labels)

In [None]:
def compute_metrics(pred):
    y_true = pred.label_ids
    y_pred = pred.predictions.argmax(-1)

    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


def train_and_evaluate_model(model_id):
    print(f"\n{'='*60}")
    print(f"Modelo: {model_id}")
    print(f"{'='*60}")

    tokenizer = AutoTokenizer.from_pretrained(model_id)

    MAX_LENGTH = max([len(tokenizer(text).input_ids) for text in dataset['train']['text']])

    def tokenize(batch):
        return tokenizer(batch["text"], padding="max_length", max_length=MAX_LENGTH)

    encoded_data = dataset.map(tokenize, batched=True)

    small_train = encoded_data["train"].shuffle(seed=42).select(range(1000))
    small_val = encoded_data["validation"].shuffle(seed=42).select(range(500))
    small_test = encoded_data["test"].shuffle(seed=42).select(range(500))

    model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=NUM_LABELS)

    args = TrainingArguments(output_dir=f"./outputs_{model_id.replace('/', '_')}", report_to="none")

    trainer = Trainer(
        model=model,
        train_dataset=small_train,
        eval_dataset=small_val,
        args=args,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    def get_prediction(text):
        inputs = tokenizer(text, padding="max_length", max_length=MAX_LENGTH, truncation=True, return_tensors="pt").to(model.device)
        pred = model(**inputs).logits
        return pred.softmax(1).argmax().item()

    y_pred = [get_prediction(text) for text in small_test['text']]
    y_true = small_test['label']

    print(classification_report(y_true=y_true, y_pred=y_pred, target_names=labels))

    return model_id, accuracy_score(y_true, y_pred)

### Apartado b
Entrenar y evaluar todos los modelos.

In [None]:
results = []
for model_id in model_ids:
    model_name, accuracy = train_and_evaluate_model(model_id)
    results.append((model_name, accuracy))

print("\n" + "="*60)
print("RESUMEN DE RESULTADOS")
print("="*60)
for model_name, accuracy in results:
    print(f"{model_name}: {accuracy:.4f}")