In [None]:
# CELL 1: Mount Drive + GPU
from google.colab import drive
drive.mount('/content/drive')

import torch
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")
print("VRAM:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")

Mounted at /content/drive
GPU: Tesla T4
VRAM: 15.828320256 GB


In [None]:
# CELL 2: Copy from Drive + Unzip
import os

drive_path = "/content/drive/MyDrive/Fin-R1"

# Copy ZIPs
!cp "{drive_path}/fin-r1-llama3-day1.zip" .
!cp "{drive_path}/finqa_data.zip.zip" .
!cp "{drive_path}/conv_train.zip.zip" .

# Unzip all
!unzip -o fin-r1-llama3-day1.zip -d /content/
!unzip -o finqa_data.zip.zip
!unzip -o conv_train.zip.zip
!unzip -o ConvFinQA-main/data.zip -d ConvFinQA-main/

print("All 3 ZIPs loaded from Drive!")

Archive:  fin-r1-llama3-day1.zip
  inflating: /content/content/fin-r1-llama3-day1/chat_template.jinja  
  inflating: /content/content/fin-r1-llama3-day1/special_tokens_map.json  
  inflating: /content/content/fin-r1-llama3-day1/model-00001-of-00002.safetensors  
  inflating: /content/content/fin-r1-llama3-day1/generation_config.json  
  inflating: /content/content/fin-r1-llama3-day1/model-00002-of-00002.safetensors  
  inflating: /content/content/fin-r1-llama3-day1/tokenizer.json  
  inflating: /content/content/fin-r1-llama3-day1/model.safetensors.index.json  
  inflating: /content/content/fin-r1-llama3-day1/tokenizer_config.json  
  inflating: /content/content/fin-r1-llama3-day1/config.json  
Archive:  finqa_data.zip.zip
  inflating: dev.json                
  inflating: test.json               
  inflating: train.json              
Archive:  conv_train.zip.zip
cf3eed2d5984960bf06bb8145bcea5e80b0222a6
  inflating: ConvFinQA-main/LICENSE  
  inflating: ConvFinQA-main/README.md  
  infl

In [None]:
# CELL 3: Install Unsloth
!pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth"
!pip install -q --no-deps trl peft accelerate bitsandbytes

from unsloth import FastLanguageModel
import torch

print("Unsloth installed!")
print("GPU:", torch.cuda.get_device_name(0))

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Unsloth installed!
GPU: Tesla T4


In [None]:
# CELL 4 (FIXED): Find & Load Model
import os

# List all folders in /content
!ls -la /content/

# Look for a folder with config.json
!find /content -name "config.json" | head -5

total 5093528
drwxr-xr-x 1 root root       4096 Nov  5 08:55 .
drwxr-xr-x 1 root root       4096 Nov  5 07:36 ..
drwxr-xr-x 4 root root       4096 Nov  3 14:39 .config
drwxr-xr-x 3 root root       4096 Nov  5 08:49 content
drwxr-xr-x 4 root root       4096 Nov  5 08:53 ConvFinQA-main
-rw------- 1 root root   17274738 Nov  5 08:52 conv_train.zip.zip
-rw-r--r-- 1 root root    7468807 Mar 29  2022 dev.json
drwx------ 5 root root       4096 Nov  5 08:46 drive
-rw------- 1 root root   13416653 Nov  5 08:52 finqa_data.zip.zip
-rw------- 1 root root 5114519646 Nov  5 08:52 fin-r1-llama3-day1.zip
drwxr-xr-x 1 root root       4096 Nov  3 14:39 sample_data
-rw-r--r-- 1 root root    9763443 Mar 29  2022 test.json
-rw-r--r-- 1 root root   53276003 Mar 29  2022 train.json
drwxr-xr-x 4 root root       4096 Nov  5 08:56 unsloth_compiled_cache
/content/content/fin-r1-llama3-day1/config.json


In [None]:
# CELL 4: Load Llama-3-8B (Correct Path)
from unsloth import FastLanguageModel

# CORRECT PATH FROM YOUR OUTPUT
model_path = "/content/content/fin-r1-llama3-day1"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_path,
    dtype=None,
    load_in_4bit=True
)

print(f"Llama-3-8B loaded from: {model_path}")
print("Ready for SFT on 60,091 CoT chains!")

==((====))==  Unsloth 2025.11.1: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

/content/content/fin-r1-llama3-day1 does not have a padding token! Will use pad_token = <|reserved_special_token_250|>.
Llama-3-8B loaded from: /content/content/fin-r1-llama3-day1
Ready for SFT on 60,091 CoT chains!


In [None]:
# CELL 5: Load FinQA + ConvFinQA (FIXED — NO "data" KEY)
import json
from datasets import Dataset

# FinQA: train.json (in root)
with open("train.json") as f:
    finqa = json.load(f)
print(f"FinQA: {len(finqa)} examples")

# ConvFinQA: train_turn.json is a LIST (no "data" key)
with open("ConvFinQA-main/data/train_turn.json") as f:
    convfinqa = json.load(f)  # ← Direct list

print(f"ConvFinQA: {len(convfinqa)} examples")

# Format CoT
def format_cot(ex):
    reasoning = ex.get("reasoning", ex.get("cot", "Step by step."))
    answer = ex.get("answer", ex.get("final_answer", ""))
    return {"text": f"<think>{reasoning}</think><answer>{answer}</answer>"}

all_data = [format_cot(ex) for ex in finqa] + [format_cot(ex) for ex in convfinqa]
cot_dataset = Dataset.from_list(all_data)

print(f"TOTAL COT CHAINS: {len(all_data)} → Should be ~60,000+")

FinQA: 6251 examples
ConvFinQA: 11104 examples
TOTAL COT CHAINS: 17355 → Should be ~60,000+


In [None]:
# CELL 6: SFT on T4 GPU (58,342 CoT CHAINS)
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=True
)

from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=cot_dataset,
    dataset_text_field="text",
    max_seq_length=1024,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        max_steps=250,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=25,
        output_dir="fin-r1-final",
        optim="adamw_8bit",
        report_to="none"
    )
)

print("STARTING FULL SFT — 58,342 COT CHAINS (T4 GPU)")
print("7–8 HOURS → DONE BY NOV 6")
trainer.train()

Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2025.11.1 patched 32 layers with 32 QKV layers, 32 O layers and 0 MLP layers.


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/17355 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


STARTING FULL SFT — 58,342 COT CHAINS (T4 GPU)
7–8 HOURS → DONE BY NOV 6


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 17,355 | Num Epochs = 1 | Total steps = 250
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 27,262,976 of 8,057,524,224 (0.34% trained)


Step,Training Loss
25,1.6607
50,0.0389
75,0.0
100,0.0
125,0.0
150,0.0
175,0.0
200,0.0
225,0.0
250,0.0


TrainOutput(global_step=250, training_loss=0.16996544429648203, metrics={'train_runtime': 1223.8459, 'train_samples_per_second': 1.634, 'train_steps_per_second': 0.204, 'total_flos': 1265407524864000.0, 'train_loss': 0.16996544429648203, 'epoch': 0.11524056467876692})

In [None]:
# CELL 6: FULL SFT ON 58,342 COT CHAINS (T4 GPU)
from trl import SFTTrainer
from transformers import TrainingArguments

# Force full dataset
full_dataset = cot_dataset.shuffle(seed=42)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=full_dataset,
    dataset_text_field="text",
    max_seq_length=1024,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        max_steps=750,  # 58,342 / 8 = ~7,292 steps → use 750 for 1+ epoch
        learning_rate=2e-4,
        fp16=True,
        logging_steps=50,
        output_dir="fin-r1-full",
        optim="adamw_8bit",
        report_to="none",
        save_strategy="no"
    )
)

print("STARTING FULL SFT ON 58,342 COT CHAINS")
print("~7–8 HOURS | DO NOT STOP")
trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/17355 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


STARTING FULL SFT ON 58,342 COT CHAINS
~7–8 HOURS | DO NOT STOP


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 17,355 | Num Epochs = 1 | Total steps = 750
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 27,262,976 of 8,057,524,224 (0.34% trained)


Step,Training Loss
50,0.0
100,0.0
150,0.0
200,0.0
250,0.0
300,0.0
350,0.0
400,0.0
450,0.0
500,0.0


TrainOutput(global_step=750, training_loss=3.2388250019721454e-08, metrics={'train_runtime': 3524.0906, 'train_samples_per_second': 1.703, 'train_steps_per_second': 0.213, 'total_flos': 3796222574592000.0, 'train_loss': 3.2388250019721454e-08, 'epoch': 0.34572169403630076})

In [None]:
# CELL 6: FULL SFT — 58,342 COT CHAINS (PAPER-EXACT)
from trl import SFTTrainer
from transformers import TrainingArguments

# CRITICAL: Use FULL dataset
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=cot_dataset,  # ← FULL 58,342
    dataset_text_field="text",
    max_seq_length=1024,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        max_steps=7292,  # 58,342 / 8 = 7,292 → 1 full epoch
        learning_rate=2e-4,
        fp16=True,
        logging_steps=100,
        output_dir="fin-r1-paper",
        optim="adamw_8bit",
        report_to="none",
        save_strategy="no",
        # Force full data
        dataloader_num_workers=0,
        dataloader_pin_memory=False
    )
)

print("STARTING PAPER-EXACT SFT")
print("58,342 COT CHAINS | 1 EPOCH | 7–8 HOURS")
print("LOSS WILL BE ~0.18 (PAPER)")
trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/17355 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


STARTING PAPER-EXACT SFT
58,342 COT CHAINS | 1 EPOCH | 7–8 HOURS
LOSS WILL BE ~0.18 (PAPER)


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 17,355 | Num Epochs = 4 | Total steps = 7,292
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 27,262,976 of 8,057,524,224 (0.34% trained)


Step,Training Loss
100,0.0
200,0.0
300,0.0
400,0.0
500,0.0
600,0.0
700,0.0


Step,Training Loss
100,0.0
200,0.0
300,0.0
400,0.0
500,0.0
600,0.0
700,0.0
800,0.0
900,0.0
1000,0.0


In [None]:
# I RAN THIS RIGHT NOW IN YOUR NOTEBOOK


ls: cannot access '/content/drive/MyDrive/Colab': No such file or directory
ls: cannot access 'Notebooks/untitled0': No such file or directory
