In [1]:
!uv pip install unsloth transformers torch bitsandbytes peft accelerate datasets

[2mUsing Python 3.11.13 environment at: /home/ardjano/.pyenv/versions/unsloth-env[0m
[2mAudited [1m7 packages[0m [2min 9ms[0m[0m


In [2]:
!uv pip install ipywidgets scikit-learn

[2mUsing Python 3.11.13 environment at: /home/ardjano/.pyenv/versions/unsloth-env[0m
[2mAudited [1m2 packages[0m [2min 4ms[0m[0m


In [3]:
!uv pip install matplotlib seaborn

[2mUsing Python 3.11.13 environment at: /home/ardjano/.pyenv/versions/unsloth-env[0m
[2mAudited [1m2 packages[0m [2min 3ms[0m[0m


In [4]:
# Using pip
!pip install -U ipywidgets

# This installs the JupyterLab extension
!pip install jupyterlab-widgets


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [5]:
from unsloth import FastModel
import torch
import pandas as pd
import numpy as np
from datasets import Dataset
from trl import SFTTrainer, SFTConfig
from unsloth.chat_templates import train_on_responses_only
from unsloth.chat_templates import get_chat_template

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [6]:
# New code for Qwen3-32B
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-32B-unsloth-bnb-4bit", # Changed model name
    max_seq_length = 2048,
    load_in_4bit = True,
)



==((====))==  Unsloth 2025.7.11: Fast Qwen3 patching. Transformers: 4.54.1.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 23.683 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,  # A good starting point for a model of this size
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"], # Updated target modules for Qwen3
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
)

In [None]:
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/humanandllm.csv")
df = df[['Context', 'Response']]

trainset, testset = train_test_split(df, test_size=0.057, random_state=42)

In [None]:
# New code for Qwen3-32B
# No need for `get_chat_template`, Unsloth handles it automatically for Qwen3.

def format_chat_template(row):
    # Create the conversation structure for Qwen3
    row_json = [
        {"role" : "user", "content" : row["Context"]},
        {"role" : "assistant", "content" : row["Response"]},
    ]
    # Apply the template for non-thinking mode
    row["text"] = tokenizer.apply_chat_template(
        row_json,
        tokenize = False,
        add_generation_prompt = False,
        enable_thinking = False, # This is the key to "non-thinking mode"
    )
    return row




In [None]:
# Apply the formatting function to each split DataFrame
formatted_trainset = trainset.apply(format_chat_template, axis=1)
formatted_testset = testset.apply(format_chat_template, axis=1)

# Convert each pandas DataFrame to a Hugging Face Dataset
hf_train = Dataset.from_pandas(formatted_trainset)
hf_test = Dataset.from_pandas(formatted_testset)

hf_train = hf_train.remove_columns(["Context", "Response"])
hf_test = hf_test.remove_columns(["Context", "Response"])

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = hf_train,
    eval_dataset  = hf_test,
    args = SFTConfig(
        dataset_text_field          = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps                = 5,
        num_train_epochs          = 1, # maybe look at it later
        # max_steps                   = 30,
        learning_rate               = 2e-4,
        logging_steps               = 5,

        # evaluation
        eval_strategy               = "steps",
        eval_steps                  = 10,
        save_strategy               = "steps",
        save_steps                  = 10,
        save_total_limit            = 2,

        load_best_model_at_end      = True,

        optim                       = "adamw_8bit",
        weight_decay                = 0.01,
        lr_scheduler_type           = "linear",
        seed                        = 42,
        report_to                   = "none",
        dataset_num_proc            = 2,
    )
)

In [None]:
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

In [None]:
print("--- Getting Baseline Performance ---")
baseline_metrics = trainer.evaluate()
print(baseline_metrics)


In [None]:
torch._dynamo.config.cache_size_limit = 64

In [None]:
trainer_stats = trainer.train()

In [None]:
trainer.state

In [None]:
import pickle

# Load the object from the file
with open("trainer_stats.pkl", "rb") as f:
    loaded_stats = pickle.load(f)

print(loaded_stats)
# Now you can use it just like the original object
print(f"Total training loss: {loaded_stats.training_loss}")
print(f"Log history: {loaded_stats.log_history}")

In [None]:
baseline_eval_loss_at_step_0 = 3.4750258922576904

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Apply the professional seaborn theme
sns.set_theme(style="whitegrid", context="paper")

log_history = trainer.state.log_history
train_logs = [log for log in log_history if 'loss' in log]
eval_logs = [log for log in log_history if 'eval_loss' in log]

train_steps = [log['step'] for log in train_logs]
train_losses = [log['loss'] for log in train_logs]
eval_steps = [log['step'] for log in eval_logs]
eval_losses = [log['eval_loss'] for log in eval_logs]

eval_steps.insert(0, 0)
eval_losses.insert(0, baseline_eval_loss_at_step_0)

# Create the plot
plt.figure(figsize=(10, 6))

# sns.lineplot for styled lines
sns.lineplot(x=train_steps, y=train_losses, label='Training Loss')
sns.lineplot(x=eval_steps, y=eval_losses, label='Validation Loss', marker='o', linestyle='--')

plt.title('Training vs. Validation Loss')
plt.xlabel('Training Steps')
plt.ylabel('Loss')
plt.legend()
plt.savefig('qwen32b_loss1.pdf')
plt.show()

In [None]:
trainer.save_model("../trained_models/qwen32")

In [None]:
model.save_pretrained_merged(
    "../trained_models/qwen32",  # A new folder name for the final model
    tokenizer,
    save_method = "merged_16bit",
)