In [None]:
# Cell 1: Install Unsloth
import torch
# Check for GPU and CUDA availability
try:
    import pynvml
    pynvml.nvmlInit()
    gpu_name = pynvml.nvmlDeviceGetName(pynvml.nvmlDeviceGetHandleByIndex(0))
    if "A100" not in gpu_name:
        print("Warning: You are not on an A100. Unsloth works best on modern GPUs.")
except:
    pass

print("Installing Unsloth...")
# Use the installation command for the latest CUDA version in Colab
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

print("✅ Unsloth and all dependencies installed successfully.")

Installing Unsloth...
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-9rkrk2pv/unsloth_379e7f0a17064471b2f0ed9f281b2472
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-9rkrk2pv/unsloth_379e7f0a17064471b2f0ed9f281b2472
  Resolved https://github.com/unslothai/unsloth.git to commit 1efa71fbafbd3320331f18cb63d7c8d595ca38c8
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.10.2 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.10.2-py3-none-any.whl.metadata (31 kB)
Collecting tyro (from unsloth@ git+https://github.co

In [None]:
# Cell 2: Upload Your Dataset
from google.colab import files

print("Please upload your fine_tuning_dataset.json file")
uploaded = files.upload()
dataset_path = next(iter(uploaded))
print(f"\n✅ Successfully uploaded '{dataset_path}'")

Please upload your fine_tuning_dataset.json file


Saving fine_tuning_dataset.json to fine_tuning_dataset.json

✅ Successfully uploaded 'fine_tuning_dataset.json'


In [None]:
# Cell 3: Load Model and Tokenizer with Unsloth
from unsloth import FastLanguageModel
from datasets import load_dataset

max_seq_length = 2048
dtype = None # Autodetect
load_in_4bit = True

print("[*] Loading model and tokenizer via Unsloth...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit", # Using Unsloth's optimized version
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
print("✅ Model and tokenizer loaded successfully.")

# Load your dataset
dataset = load_dataset("json", data_files=dataset_path, split="train")
print(f"✅ Dataset loaded successfully with {len(dataset)} samples.")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
[*] Loading model and tokenizer via Unsloth...
==((====))==  Unsloth 2025.10.2: Fast Mistral patching. Transformers: 4.56.2.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

✅ Model and tokenizer loaded successfully.


Generating train split: 0 examples [00:00, ? examples/s]

✅ Dataset loaded successfully with 316 samples.


In [None]:
# Cell 4: Configure LoRA and Start Fine-Tuning (Corrected)
from trl import SFTTrainer
from transformers import TrainingArguments

# ... (your model and formatting_func definitions are the same) ...

# Define the trainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    formatting_func = format_sample,
    max_seq_length = max_seq_length,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        # 🔽 ADD THIS LINE TO DISABLE WANDB 🔽
        report_to = "none",
    ),
)

# Start training!
print("\n[*] Starting the fine-tuning process...")
trainer.train()
print("✅ Fine-tuning complete!")

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/316 [00:00<?, ? examples/s]


[*] Starting the fine-tuning process...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 316 | Num Epochs = 1 | Total steps = 40
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 7,283,675,136 (0.58% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.9226
2,2.0101
3,1.754
4,1.1634
5,0.3867
6,0.1521
7,0.048
8,0.0409
9,0.0374
10,0.0336


✅ Fine-tuning complete!


In [None]:
# Cell 5: Save the Fine-Tuned Model Adapters
new_model_name = "mistral-7b-cyber-analyst"
model.save_pretrained(new_model_name)

In [None]:
# Cell 6: Copy Adapters to Google Drive

from google.colab import drive
import os

# The name of the folder where the model was saved locally
source_path = "mistral-7b-cyber-analyst"

print("[*] Mounting Google Drive...")
drive.mount('/content/drive')

# The destination folder on your Google Drive
destination_path = "/content/drive/My Drive/sih fine tuned model"

print(f"[*] Copying trained adapters from '{source_path}' to '{destination_path}'...")
os.makedirs(destination_path, exist_ok=True)
!cp -r {source_path}/* {destination_path}/

print(f"✅ Model adapters successfully saved to your Google Drive.")

[*] Mounting Google Drive...
Mounted at /content/drive
[*] Copying trained adapters from 'mistral-7b-cyber-analyst' to '/content/drive/My Drive/sih fine tuned model'...
cp: target 'model/' is not a directory
✅ Model adapters successfully saved to your Google Drive.


In [None]:
# Run this in a new cell to verify the files were copied
!ls -l "/content/drive/My Drive/sih fine tuned model"

total 0


In [None]:
# Cell 8: Save Adapters to Google Drive (Robust Version)

from google.colab import drive
import os

source_path = "mistral-7b-cyber-analyst"

if not os.path.exists(source_path):
    print(f"❌ ERROR: Source folder '{source_path}' not found.")
else:
    print("[*] Mounting Google Drive...")
    drive.mount('/content/drive')

    destination_path = "/content/drive/My Drive/sih fine tuned model"
    print(f"[*] Copying trained adapters from '{source_path}' to '{destination_path}'...")
    os.makedirs(destination_path, exist_ok=True)

    # Using rsync for a more reliable copy with progress
    !rsync -avh --progress {source_path}/ "{destination_path}/"

    print(f"\n✅ Model adapters successfully saved to your Google Drive.")

[*] Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[*] Copying trained adapters from 'mistral-7b-cyber-analyst' to '/content/drive/My Drive/sih fine tuned model'...
sending incremental file list
./
README.md
          5.26K 100%    0.00kB/s    0:00:00 (xfr#1, to-chk=2/4)
adapter_config.json
          1.09K 100%    1.04MB/s    0:00:00 (xfr#2, to-chk=1/4)
adapter_model.safetensors
        167.83M 100%  350.23MB/s    0:00:00 (xfr#3, to-chk=0/4)

sent 167.88M bytes  received 76 bytes  111.92M bytes/sec
total size is 167.84M  speedup is 1.00

✅ Model adapters successfully saved to your Google Drive.
