In [4]:
!pip install evaluate datasets transformers scikit-learn

import os
import numpy as np
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    get_cosine_schedule_with_warmup
)
import evaluate

# --- 1. Desativar Weights & Biases (Evita pedir senha) ---
os.environ["WANDB_DISABLED"] = "true"

# --- Configuração Inicial ---
MODEL_CHECKPOINT = "distilbert-base-uncased"
TASK = "sst2"

# Carregar Dataset e Métricas
print("Carregando dataset e métricas...")
dataset = load_dataset("glue", TASK)
metric = evaluate.load("glue", TASK)
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)

# Pré-processamento
def preprocess_function(examples):
    return tokenizer(examples['sentence'], truncation=True, padding=True, max_length=128)

encoded_dataset = dataset.map(preprocess_function, batched=True)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

# --- Função Base de Treinamento ---
def run_experiment(exp_name, model_init_fn, training_args_updates={}):
    print(f"\n\n>>> Iniciando Experimento: {exp_name} <<<")

    # Argumentos padrão
    args_dict = {
        "output_dir": f"./results_{exp_name}",
        "eval_strategy": "epoch",  # Atualizado para versao nova
        "save_strategy": "epoch",
        "learning_rate": 2e-5,
        "per_device_train_batch_size": 16,
        "per_device_eval_batch_size": 16,
        "num_train_epochs": 3,
        "weight_decay": 0.01,
        "load_best_model_at_end": True,
        "metric_for_best_model": "accuracy",
        "push_to_hub": False,
        "logging_dir": f"./logs_{exp_name}",
        "optim": "adamw_torch",
        "report_to": "none"  # Garante que nao use W&B
    }

    # Atualiza com modificações específicas
    args_dict.update(training_args_updates)

    args = TrainingArguments(**args_dict)

    model = model_init_fn()

    # Trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset["validation"],
        processing_class=tokenizer, # Atualizado de 'tokenizer' para 'processing_class'
        compute_metrics=compute_metrics,
    )

    # Treino
    train_result = trainer.train()
    metrics = trainer.evaluate()

    print(f"Resultados ({exp_name}): {metrics}")
    return metrics

# ==========================================
# 1. Reprodução Original (Baseline TPF-02)
# ==========================================
def model_init_original():
    return AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT, num_labels=2)

# ==========================================
# 2. Modificação 1: Layer Freezing (Arquitetura)
# ==========================================
def model_init_frozen():
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT, num_labels=2)

    # Congelar Embeddings
    for param in model.distilbert.embeddings.parameters():
        param.requires_grad = False

    # Congelar as 3 primeiras camadas (0, 1, 2)
    for i in range(3):
        for param in model.distilbert.transformer.layer[i].parameters():
            param.requires_grad = False

    print("Camadas congeladas: Embeddings + Encoder Layers 0-2.")
    return model

# ==========================================
# 3. Modificação 2: Hiperparâmetros (Cosine Scheduler + WD)
# ==========================================
def model_init_hyper():
    return AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT, num_labels=2)

training_args_mod2 = {
    "learning_rate": 3e-5,
    "weight_decay": 0.05,
    "lr_scheduler_type": "cosine",
    "warmup_ratio": 0.1
}

# --- Execução Principal ---
if __name__ == "__main__":
    print("Rodando Baseline...")
    res_orig = run_experiment("original", model_init_original)

    print("Rodando Modificação 1 (Congelamento)...")
    res_mod1 = run_experiment("layer_freezing", model_init_frozen)

    print("Rodando Modificação 2 (Hiperparâmetros)...")
    res_mod2 = run_experiment("hyperparams_opt", model_init_hyper, training_args_mod2)

    print("\n\n=== RESUMO FINAL ===")
    print(f"Original Acurácia: {res_orig['eval_accuracy']:.4f}")
    print(f"Mod 1 (Freezing) Acurácia: {res_mod1['eval_accuracy']:.4f}")
    print(f"Mod 2 (HyperOpt) Acurácia: {res_mod2['eval_accuracy']:.4f}")

Carregando dataset e métricas...


Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Rodando Baseline...


>>> Iniciando Experimento: original <<<


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1835,0.325518,0.90711
2,0.1324,0.382383,0.897936
3,0.0796,0.416519,0.90711


Resultados (original): {'eval_loss': 0.32551810145378113, 'eval_accuracy': 0.9071100917431193, 'eval_runtime': 1.4342, 'eval_samples_per_second': 608.025, 'eval_steps_per_second': 38.35, 'epoch': 3.0}
Rodando Modificação 1 (Congelamento)...


>>> Iniciando Experimento: layer_freezing <<<


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Camadas congeladas: Embeddings + Encoder Layers 0-2.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2298,0.279847,0.899083
2,0.1777,0.333017,0.900229
3,0.1371,0.377994,0.894495


Resultados (layer_freezing): {'eval_loss': 0.33301690220832825, 'eval_accuracy': 0.9002293577981652, 'eval_runtime': 1.4334, 'eval_samples_per_second': 608.35, 'eval_steps_per_second': 38.371, 'epoch': 3.0}
Rodando Modificação 2 (Hiperparâmetros)...


>>> Iniciando Experimento: hyperparams_opt <<<


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1958,0.289576,0.900229
2,0.1306,0.316504,0.904817
3,0.0685,0.380381,0.905963


Resultados (hyperparams_opt): {'eval_loss': 0.3803805112838745, 'eval_accuracy': 0.9059633027522935, 'eval_runtime': 1.4312, 'eval_samples_per_second': 609.27, 'eval_steps_per_second': 38.429, 'epoch': 3.0}


=== RESUMO FINAL ===
Original Acurácia: 0.9071
Mod 1 (Freezing) Acurácia: 0.9002
Mod 2 (HyperOpt) Acurácia: 0.9060
