In [None]:
!pip install transformers datasets peft trl bitsandbytes wandb

Collecting trl
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.13.0->peft)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.13.0->peft)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvi

In [None]:
import wandb
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("WANDB_API_KEY")
wandb.login(key=secret_value_0)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmudit-jain2303[0m ([33mmudit-jain2303-mait[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
# # Install all required packages for Colab

import os
import gc
import torch

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from trl import DPOTrainer,DPOConfig
import bitsandbytes as bnb
# from google.colab import userdata
import wandb

# Authentication tokens (defined in Google Colab secrets tab)
hf_token = user_secrets.get_secret("HF_TOKEN")   # Make sure to add this to Colab secrets
# wb_token = userdata.get('wandb')  # Optional: for tracking experiments
# if wb_token:
#     wandb.login(key=wb_token)

# Model configuration
base_model_name = "Qwen/Qwen2.5-7B-Instruct"  # Updated to Qwen2.5-7B-Instruct
new_model_name = "Socratic-Qwen2.5-7B-v2"  # Updated model name

# Load your dataset
dataset = load_dataset("mudit23/class7-socratic-dpo", token=hf_token)['train']
print(f"Dataset loaded with {len(dataset)} examples")
print(f"Sample columns: {dataset.column_names}")



# Load tokenizer with Qwen2.5 specific settings
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=hf_token, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Function to format your dataset entries according to Qwen2.5's chat template
def format_dpo_dataset(example):
    """
    Format dataset for DPO training.
    Handles:
    - 'question': string
    - 'chosen': list of {'role': ..., 'content': ...} with dialogue
    - 'rejected': string (less preferred assistant answer)
    """

    # 1. Format the prompt using Qwen2.5 chat template
    system_message = {"role": "system", "content": "You are a helpful, harmless, and honest assistant."}
    user_message = {"role": "user", "content": example['question'].strip()}
    messages = [system_message, user_message]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # 2. Reconstruct the assistant's (teacher's) preferred answer
    # In your dataset, 'teacher' represents the assistant, so we collect those turns only
    chosen_turns = [msg['content'].strip() for msg in example['chosen'] if msg['role'] == 'teacher']
    chosen_response = "\n".join(chosen_turns)

    # 3. Rejected is already a plain assistant reply
    rejected_response = example['rejected'].strip()

    # 4. Ensure EOS tokens
    if not chosen_response.endswith(tokenizer.eos_token):
        chosen_response += tokenizer.eos_token
    if not rejected_response.endswith(tokenizer.eos_token):
        rejected_response += tokenizer.eos_token

    return {
        "prompt": prompt,
        "chosen": chosen_response,
        "rejected": rejected_response,
    }

# Print an example from raw dataset
print("\nRaw dataset example:")
print(dataset[0])

# Format the dataset
original_columns = dataset.column_names
formatted_dataset = dataset.map(
    format_dpo_dataset,
    remove_columns=original_columns
)

split_dataset = formatted_dataset.train_test_split(test_size=0.1, seed=42)

train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']
print(len(train_dataset))
print(len(eval_dataset))

# Print example of formatted dataset
print("\nFormatted dataset example:")
print(formatted_dataset[0])

# Setup quantization configuration - optimized for Qwen2.5
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    llm_int8_enable_fp32_cpu_offload=True,
    bnb_4bit_compute_dtype=torch.float16
)

# Load the base model with quantization - Qwen2.5 specific settings
print("\nLoading Qwen2.5 model. This may take a few minutes...")
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    token=hf_token,
    # attn_implementation="flash_attention_2"  # Using Flash Attention 2 for Qwen2.5
)
model.config.use_cache = False

# LoRA configuration for Qwen2.5 - target modules specific to Qwen2.5 architecture
peft_config = LoraConfig(
    r=16,  # Increased rank for better adaptation to Qwen2.5
    lora_alpha=32,  # Adjusted alpha for Qwen2.5
    lora_dropout=0.05,  # Reduced dropout for better performance with Qwen2.5
    bias="none",
    task_type="CAUSAL_LM",
    # Target modules specific to Qwen2.5 architecture
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
        "w1", "w2", "w3"  # Additional projection layers in Qwen2.5
    ]
)

# # Training arguments - optimized for Qwen2.5 with ~700 examples
training_args = DPOConfig(
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,# Smaller batch size for Qwen2.5 due to larger model
    gradient_accumulation_steps=16,  # Increased for stable training with Qwen2.5
    gradient_checkpointing=True,
    learning_rate=5e-6,  # Lower learning rate for Qwen2.5
    lr_scheduler_type="cosine",
    max_steps=200,  # Slightly more steps for Qwen2.5
    save_strategy="steps",
    save_steps=50,
    save_total_limit=1,  # Keep only the best 2 checkpoints
    logging_steps=10,
    output_dir=new_model_name,
    optim="paged_adamw_8bit",  # More memory efficient optimizer for Qwen2.5
    warmup_steps=50,  # Increased warmup for Qwen2.5
    bf16=False,  # Use BF16 for better training stability with Qwen2.5
    fp16=True,
    eval_strategy="steps",
    eval_steps=50,
    report_to="wandb",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    model_init_kwargs=None,
    max_prompt_length=768,  # Increased for Qwen2.5's context handling
    max_length=1536,
    beta=0.2,
)

# Create DPO trainer with Qwen2.5 specific settings
print("\nInitializing DPO trainer for Qwen2.5...")
dpo_trainer = DPOTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    peft_config=peft_config,
)

# Train the model
print("\nStarting DPO training for Qwen2.5...")
dpo_trainer.train()

# Save the trained model
print("\nSaving model...")
dpo_trainer.model.save_pretrained("final_checkpoint")
tokenizer.save_pretrained("final_checkpoint")

# Clean up resources
del dpo_trainer, model
gc.collect()
torch.cuda.empty_cache()

# Reload base model in FP16 (instead of NF4)
print("\nReloading Qwen2.5 base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    token=hf_token
)

# Merge base model with the adapter
print("\nMerging with LoRA weights...")
model = PeftModel.from_pretrained(base_model, "final_checkpoint")
model = model.merge_and_unload()

# Save merged model
print("\nSaving merged model...")
model.save_pretrained(new_model_name)
tokenizer.save_pretrained(new_model_name)

# Push to HF Hub (if desired)
if hf_token:
    print("\nPushing model to Hugging Face Hub...")
    model.push_to_hub(new_model_name, use_temp_dir=False, token=hf_token)
    tokenizer.push_to_hub(new_model_name, use_temp_dir=False, token=hf_token)

2025-05-05 17:37:22.363269: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746466642.555809      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746466642.612869      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md:   0%|          | 0.00/463 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/776k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/734 [00:00<?, ? examples/s]

Dataset loaded with 734 examples
Sample columns: ['question', 'rejected', 'chosen']


tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]


Raw dataset example:
{'question': 'What early studies were inspired by simple observations like paper planes?', 'rejected': 'According to NCERT Class 7 Science, the early studies inspired by simple observations like paper planes are the studies of aerodynamics and the work of Leonardo da Vinci.', 'chosen': [{'content': 'What do you think is the connection between paper planes and early studies in science?', 'role': 'teacher'}, {'content': "Hmm, I'm not sure. Maybe it's because paper planes fly in the air?", 'role': 'student'}, {'content': "That's a good start! Flight is definitely related to paper planes. Can you think of someone who might be interested in studying how things fly?", 'role': 'teacher'}, {'content': 'Like pilots or people who build airplanes?', 'role': 'student'}, {'content': 'Exactly! Now, can you think of someone who lived a long time ago, before airplanes were invented, but might still be interested in how things fly?', 'role': 'teacher'}, {'content': 'Maybe someone 

Map:   0%|          | 0/734 [00:00<?, ? examples/s]

660
74

Formatted dataset example:
{'rejected': 'According to NCERT Class 7 Science, the early studies inspired by simple observations like paper planes are the studies of aerodynamics and the work of Leonardo da Vinci.<|im_end|>', 'chosen': "What do you think is the connection between paper planes and early studies in science?\nThat's a good start! Flight is definitely related to paper planes. Can you think of someone who might be interested in studying how things fly?\nExactly! Now, can you think of someone who lived a long time ago, before airplanes were invented, but might still be interested in how things fly?\nThat's correct! Leonardo da Vinci made many drawings of flying machines, including birds in flight. His studies on bird flight and wing structure actually laid the foundation for modern aerodynamics. And it all started with simple observations, much like throwing a paper plane.\nSo, to summarize, simple observations like paper planes can lead to early studies in science, ju

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]


Initializing DPO trainer for Qwen2.5...


Extracting prompt in train dataset:   0%|          | 0/660 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/660 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/660 [00:00<?, ? examples/s]

Extracting prompt in eval dataset:   0%|          | 0/74 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/74 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/74 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.



Starting DPO training for Qwen2.5...




Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
50,0.0035,0.001412,5.996588,-1.601741,1.0,7.598329,-369.52002,-224.416672,0.31236,-0.676932
100,0.0002,0.000261,7.650548,-2.222262,1.0,9.872809,-361.250214,-227.519272,0.345647,-0.645607
150,0.0002,0.000222,7.803051,-2.286029,1.0,10.08908,-360.487671,-227.838135,0.34786,-0.64434
200,0.0002,0.000216,7.824438,-2.303201,1.0,10.12764,-360.380737,-227.923981,0.348026,-0.644479





Saving model...

Reloading Qwen2.5 base model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]


Merging with LoRA weights...

Saving merged model...

Pushing model to Hugging Face Hub...


model-00001-of-00004.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.09G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.33G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]