# Step 1: Mounting Google Drive and Importing Libraries


In [None]:
from google.colab import drive
drive.mount("/content/drive")
%cd /content/drive/MyDrive/grpo-verified-reasoner
!ls

In [None]:
# Install UV (Faster pip)
!pip install --upgrade -qqq uv

In [None]:
!pip install -q unsloth

In [2]:
import os
import torch
import safetensors.torch
from unsloth import FastLanguageModel

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


# Step 2: Loading the Base Model and the LoRA Adapter

In [3]:
BASE_MODEL_PATH = "unsloth/Qwen3-4B-Base"
CHECKPOINT_PATH = "outputs/checkpoint-188"
MERGED_PATH = "models/qwen3-4b-grpo-final-2-merged"

In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = CHECKPOINT_PATH,
    max_seq_length = 3072,
    load_in_4bit = False,    # Must be False for merging
    dtype = torch.float16,   # Standard 16-bit precision
)

==((====))==  Unsloth 2026.1.2: Fast Qwen3 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Unsloth 2026.1.2 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.


In [5]:
# This physically modifies the weights: W_new = W_base + (A * B)
model.merge_and_unload()

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 2560, padding_idx=151654)
    (layers): ModuleList(
      (0-35): 36 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=2560, out_features=4096, bias=False)
          (k_proj): Linear(in_features=2560, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2560, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=2560, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=2560, out_features=9728, bias=False)
          (up_proj): Linear(in_features=2560, out_features=9728, bias=False)
          (down_proj): Linear(in_features=9728, out_features=2560, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layerno

In [6]:
# Saves as a standard model (no adapters folder, just model.safetensors)
model.save_pretrained_merged(
    MERGED_PATH,
    tokenizer,
    save_method = "merged_16bit",
)

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...


Unsloth: Copying 2 files from cache to `models/qwen3-4b-grpo-final-2-merged`: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:22<00:00, 11.33s/it]


Successfully copied all 2 files from cache to `models/qwen3-4b-grpo-final-2-merged`
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 20410.24it/s]
Unsloth: Merging weights into 16bit: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:46<00:00, 23.37s/it]


Unsloth: Merge process complete. Saved to `/content/drive/MyDrive/grpo-verified-reasoner/models/qwen3-4b-grpo-final-2-merged`


In [9]:
sft = safetensors.torch.load_file("models/qwen3-4b-sft/adapter_model.safetensors")
grpo = safetensors.torch.load_file("outputs/checkpoint-188/adapter_model.safetensors")

# Pick any key
k = list(sft.keys())[0]
torch.norm(sft[k] - grpo[k])

tensor(0.0285)

In [11]:
sum(torch.norm(sft[k] - grpo[k]) for k in sft.keys()) / sum(torch.norm(sft[k]) for k in sft.keys())

tensor(0.0100)