In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q transformers datasets accelerate einops
!pip install -q trl peft
!pip install -q huggingface_hub

In [None]:
from datasets import load_dataset

train_path = "/content/drive/MyDrive/VT_Intro2AI/intro2AI_sft/train.parquet"
val_path   = "/content/drive/MyDrive/VT_Intro2AI/intro2AI_sft/val.parquet"

dataset = load_dataset("parquet", data_files={
    "train": train_path,
    "val": val_path,
})

dataset

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "Qwen/Qwen3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
from peft import LoraConfig

peft_config = LoraConfig(
    r=32,
    lora_alpha=16,
    target_modules="all-linear",
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

In [None]:
import pandas as pd
df = pd.read_parquet("/content/drive/MyDrive/VT_Intro2AI/intro2AI_sft/train.parquet")
print("Columns:", df.columns.tolist())
print("\nFirst Row (raw):")
print(df.iloc[0])

print("\nParsed Prompt and Answer:")
first = df.iloc[0]
prompt_list = first["prompt"]
if isinstance(prompt_list, list) and len(prompt_list) > 0:
    print("Prompt content:", prompt_list[0].get("content", None))
else:
    print("Prompt content: <unexpected format>", prompt_list)

extra_info = first["extra_info"]
print("Answer:", extra_info.get("answer", None))

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="qwen3_sft_lora",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=1,
    learning_rate=1e-4,
    num_train_epochs=10,
    logging_steps=10,
    save_steps=200,
    eval_steps=200,
    bf16=True,
    fp16=False,
    report_to="none",
)

In [None]:
dataset["train"] = dataset["train"].rename_column("prompt", "user_prompt")
dataset["val"] = dataset["val"].rename_column("prompt", "user_prompt")

def formatting_fn(example):
    user_prompt = example["user_prompt"][0]["content"]
    answer = example["extra_info"]["answer"]
    return user_prompt + "\n" + answer

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model_name,
    train_dataset=dataset["train"],
    eval_dataset=dataset["val"],
    processing_class=tokenizer,
    formatting_func=formatting_fn,
    peft_config=peft_config,
    args=training_args,
)

trainer.train()

In [None]:
trainer.save_model("qwen3_sft_lora")