In [None]:
#!/usr/bin/env python
"""
Fine-Tuning RoBERTa with Multiple LoRA Hyperparameter Combinations
for AG News text classification, then saving ALL run results and the best model.

Author: [Your Name]
Date: [Today's Date]
"""

import os
import torch
import evaluate
import pandas as pd
from tqdm import tqdm
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)


# ==============================
#  1) 加载数据集 & 分词器
# ==============================
base_model = 'roberta-base'
print("Loading AGNEWS dataset...")
dataset = load_dataset('ag_news', split='train')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

def preprocess(examples):
    return tokenizer(examples['text'], truncation=True, padding=True)

print("Tokenizing dataset...")
tokenized_dataset = dataset.map(preprocess, batched=True, remove_columns=["text"])
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

# 提取标签信息
num_labels = dataset.features['label'].num_classes
class_names = dataset.features["label"].names
print(f"Number of labels: {num_labels}")
print(f"Label names: {class_names}")
id2label = {i: label for i, label in enumerate(class_names)}

# 定义 DataCollator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


# ==============================
#  2) 切分训练集 / 验证集
# ==============================
print("Splitting dataset into training and evaluation sets...")
split_datasets = tokenized_dataset.train_test_split(test_size=640, seed=42)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']


# ==============================
#  3) 定义 LoRA 注入的模块
# ==============================
# 下面示例仅针对顶层若干模块，可根据实际需要进行调整
target_modules = [
    "roberta.encoder.layer.7.attention.self.query",
    "roberta.encoder.layer.7.attention.self.value",
    "roberta.encoder.layer.8.attention.self.query",
    "roberta.encoder.layer.8.attention.self.value",
    "roberta.encoder.layer.9.attention.self.query",
    "roberta.encoder.layer.9.attention.self.value",
    "roberta.encoder.layer.10.intermediate.dense",
    "roberta.encoder.layer.10.output.dense",
    "roberta.encoder.layer.10.attention.self.query",
    "roberta.encoder.layer.10.attention.self.value",
    "roberta.encoder.layer.11.intermediate.dense",
    "roberta.encoder.layer.11.output.dense",
    "roberta.encoder.layer.11.attention.self.query",
    "roberta.encoder.layer.11.attention.self.value"
]


# ==============================
#  4) 训练与评估的通用设置
# ==============================
output_dir = "results"
training_args = TrainingArguments(
    output_dir=output_dir,
    report_to="tensorboard",
    logging_steps=500,
    logging_dir="logs",
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    learning_rate=3e-6,
    num_train_epochs=0.1,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    gradient_accumulation_steps=2,
    optim="adamw_torch",
    weight_decay=0.01,
    label_smoothing_factor=0.05,
    warmup_steps=300,
    lr_scheduler_type="cosine",
    fp16=True,
    dataloader_num_workers=8,
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {'accuracy': acc}

def get_trainer(model):
    return Trainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=data_collator,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )

def evaluate_model(inference_model, dataset, labelled=True, batch_size=8, data_collator=None):
    """
    Evaluate a model on a dataset. 
    Returns (metrics, all_predictions) if labelled=True, else only predictions.
    """
    eval_dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, collate_fn=data_collator
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inference_model.to(device)
    inference_model.eval()
    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())
        if labelled:
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    all_predictions = torch.cat(all_predictions, dim=0)
    if labelled:
        eval_metric = metric.compute()
        return eval_metric, all_predictions
    else:
        return all_predictions


# ==============================
#  5) 多组合 LoRA 超参搜索
# ==============================
# 这是你给出的组合
lora_combinations = [
    (13, 13, 0.1),
    # (13, 13, 0.2),
    # (13, 13, 0.3),
    # (13, 26, 0.2),
    # (13, 26, 0.3),
    # (13, 39, 0.1),
    # (13, 39, 0.2),
    # (13, 39, 0.3),
]

# 用于记录每次 (r, alpha, dropout) 对应的评估结果
all_results = []

best_accuracy = -1
best_config = None
best_model_state = None  # 用于存储最佳模型权重

for (r, alpha, dropout) in lora_combinations:
    print("=" * 60)
    print(f"Running training with LoRA r={r}, alpha={alpha}, dropout={dropout}")
    print("=" * 60)

    # 1) 重新加载 base model
    base_roberta = RobertaForSequenceClassification.from_pretrained(
        base_model,
        id2label=id2label,
        num_labels=num_labels
    )

    # 2) 构造对应的 LoRA 配置
    peft_config = LoraConfig(
        r=r,
        lora_alpha=alpha,
        lora_dropout=dropout,
        bias='none',
        target_modules=target_modules,
        task_type="SEQ_CLS"
    )

    # 3) 将基础模型包装为 LoRA 模型
    peft_model = get_peft_model(base_roberta, peft_config)
    peft_model.print_trainable_parameters()

    # 4) 实例化 Trainer
    trainer = get_trainer(peft_model)

    # 5) 开始训练
    print("Starting training...")
    train_result = trainer.train()
    print("Finished training!")

    # 6) 评估
    eval_result = trainer.evaluate()
    current_accuracy = eval_result["eval_accuracy"]
    print(f"--> Current eval_accuracy: {current_accuracy:.4f}")

    # 将该组合的结果记录下来
    all_results.append({
        "r": r,
        "lora_alpha": alpha,
        "lora_dropout": dropout,
        "eval_accuracy": current_accuracy
    })

    # 若本组合优于先前最佳，则更新最优模型
    if current_accuracy > best_accuracy:
        best_accuracy = current_accuracy
        best_config = (r, alpha, dropout)
        print(f"** Best so far! Saving current best model (accuracy={best_accuracy:.4f})")
        trainer.save_model("temp_best_model")  
        best_model_state = peft_model.state_dict()


# ==============================
#  6) 保存所有组合的结果 (CSV)
# ==============================
df_results = pd.DataFrame(all_results)
df_results.sort_values(by="eval_accuracy", ascending=False, inplace=True)
csv_path = "all_lora_results.csv"
df_results.to_csv(csv_path, index=False)
print(f"\nSaved all runs' results to {csv_path}")
print(df_results)


# ==============================
#  7) 加载最佳模型 & 最终验证
# ==============================
print("\n" + "=" * 60)
print("All runs finished!")
print(f"Best accuracy={best_accuracy:.4f} with LoRA config: r={best_config[0]}, alpha={best_config[1]}, dropout={best_config[2]}")
print("=" * 60)

print("Reloading best model from base + LoRA config, and verifying...")

# 1) 重新加载 base
final_base = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label,
    num_labels=num_labels
)

# 2) 重新构造最佳 LoRA 配置
final_peft_config = LoraConfig(
    r=best_config[0],
    lora_alpha=best_config[1],
    lora_dropout=best_config[2],
    bias='none',
    target_modules=target_modules,
    task_type="SEQ_CLS"
)
final_peft_model = get_peft_model(final_base, final_peft_config)

# 3) 加载最佳权重
final_peft_model.load_state_dict(best_model_state)

# 4) 最终验证
metrics, _ = evaluate_model(
    final_peft_model,
    eval_dataset,
    labelled=True,
    batch_size=64,
    data_collator=data_collator
)
print("Double-check final metrics:", metrics)

# 5) 保存最终最佳模型
final_save_path = "best_lora_model"
final_peft_model.save_pretrained(final_save_path)
tokenizer.save_pretrained(final_save_path)
print(f"Final best model saved to {final_save_path}")


Loading AGNEWS dataset...
Tokenizing dataset...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Number of labels: 4
Label names: ['World', 'Sports', 'Business', 'Sci/Tech']
Splitting dataset into training and evaluation sets...
Running training with LoRA r=13, alpha=13, dropout=0.1
trainable params: 993,028 || all params: 125,641,736 || trainable%: 0.7904
Starting training...


Step,Training Loss,Validation Loss


Finished training!


--> Current eval_accuracy: 0.2437
** Best so far! Saving current best model (accuracy=0.2437)

Saved all runs' results to all_lora_results.csv
    r  lora_alpha  lora_dropout  eval_accuracy
0  13          13           0.1        0.24375

All runs finished!
Best accuracy=0.2437 with LoRA config: r=13, alpha=13, dropout=0.1
Reloading best model from base + LoRA config, and verifying...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating: 100%|██████████| 10/10 [00:01<00:00,  5.07it/s]


Double-check final metrics: {'accuracy': 0.24375}
Final best model saved to best_lora_model


In [4]:
# -------------------------------
# Optional: Inference on an Unlabelled Dataset
# -------------------------------
# Load an unlabelled dataset (example using pickle), tokenize, and run inference.
unlabelled_dataset = pd.read_pickle("test_unlabelled.pkl")
test_dataset = unlabelled_dataset.map(preprocess, batched=True, remove_columns=["text"])



from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel, PeftConfig

# 加载 adapter 的 config
config = PeftConfig.from_pretrained("best_adapter")

# 加载 base model
base_model = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path,
    num_labels=4  # ✅ 必须与训练时一致
)
# 加载最佳 adapter 权重
best_model = PeftModel.from_pretrained(base_model, "best_adapter")

# 推理时使用这个模型
preds = evaluate_model(best_model, test_dataset, labelled=False, batch_size=32, data_collator=data_collator)

# preds = evaluate_model(peft_model, test_dataset, labelled=False, batch_size=32, data_collator=data_collator)
# Save predictions to CSV.
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(output_dir, "inference_output_B.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")

if __name__ == "__main__":
    print("Finished all steps. You can now run inference or further analysis as needed.")

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating: 100%|██████████| 250/250 [00:15<00:00, 16.07it/s]

Inference complete. Predictions saved to inference_output.csv
Finished all steps. You can now run inference or further analysis as needed.



