In [None]:
from datasets import load_dataset, concatenate_datasets
import os

all_datasets = []

for file in os.listdir(dataset_path):
    if file.endswith(".json"):
        file_path = os.path.join(dataset_path, file)
        print("Loading:", file)

        ds = load_dataset(
            "json",
            data_files=file_path,
            split="train"
        )

        # Remove extra columns like "id" if present
        extra_cols = [c for c in ds.column_names if c not in ["question", "answer"]]
        if extra_cols:
            ds = ds.remove_columns(extra_cols)

        all_datasets.append(ds)

# Merge all datasets
dataset = concatenate_datasets(all_datasets)

print("✅ Total samples:", len(dataset))
print(dataset[0])


Loading: constitution_qa.json


Generating train split: 0 examples [00:00, ? examples/s]

Loading: crpc_qa.json


Generating train split: 0 examples [00:00, ? examples/s]

Loading: ipc_qa.json


Generating train split: 0 examples [00:00, ? examples/s]

✅ Total samples: 14543
{'question': 'What is India according to the Union and its Territory?', 'answer': 'India, that is Bharat, shall be a Union of States.'}


In [None]:
# =========================================================
# STEP 0: GPU CHECK (Run on Google Colab with GPU enabled)
# Runtime → Change runtime type → GPU
# =========================================================

import torch
assert torch.cuda.is_available(), "❌ GPU not enabled"
print("✅ GPU:", torch.cuda.get_device_name(0))


# =========================================================
# STEP 1: INSTALL DEPENDENCIES (REAL TRITON)
# =========================================================

!pip uninstall -y torch torchvision torchaudio transformers trl unsloth bitsandbytes -q
!pip install -U pip -q

# PyTorch with CUDA
!pip install torch --index-url https://download.pytorch.org/whl/cu121 -q

# Core libraries
!pip install transformers trl datasets accelerate peft bitsandbytes kagglehub -q

# Triton (Linux only → works on Colab)
!pip install triton -q

# Unsloth
!pip install unsloth -q

print("✅ Libraries installed")


# =========================================================
# STEP 2: RESTART RUNTIME (MANDATORY IN COLAB)
# =========================================================

import os, sys
print("🔄 Restarting runtime now...")
os._exit(0)


✅ GPU: Tesla T4
[0m[31mERROR: Operation cancelled by user[0m[31m
[0m[31mERROR: Operation cancelled by user[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.8.6 requires torch<2.10,>=1.10, but you have torch 2.10.0 which is incompatible.[0m[31m
[0m

In [None]:
# =========================================================
# STEP 3: VERIFY INSTALLATION
# =========================================================

import torch, triton
from unsloth import FastLanguageModel

print("Torch:", torch.__version__)
print("CUDA:", torch.cuda.is_available())
print("Triton:", triton.__version__)
print("✅ Unsloth imported successfully")


# =========================================================
# STEP 4: DOWNLOAD KAGGLE DATASET
# =========================================================

import kagglehub

dataset_path = kagglehub.dataset_download(
    "akshatgupta7/llm-fine-tuning-dataset-of-indian-legal-texts"
)

print("📂 Dataset path:", dataset_path)


# =========================================================
# STEP 5: LOAD DATASET (FIXED – NO SCHEMA ERROR)
# =========================================================

from datasets import load_dataset, concatenate_datasets
import os

all_datasets = []

for file in os.listdir(dataset_path):
    if file.endswith(".json"):
        file_path = os.path.join(dataset_path, file)
        print(f"Loading {file}")

        ds = load_dataset(
            "json",
            data_files=file_path,
            split="train"
        )

        # Keep only required columns
        keep_cols = ["question", "answer"]
        remove_cols = [c for c in ds.column_names if c not in keep_cols]
        if remove_cols:
            ds = ds.remove_columns(remove_cols)

        all_datasets.append(ds)

dataset = concatenate_datasets(all_datasets)

print("Total samples:", len(dataset))
print(dataset[0])


# =========================================================
# STEP 6: FORMAT PROMPTS (EDITED & SAFE)
# =========================================================

def format_prompt(example):
    instruction = example.get("question", "").strip()
    output = example.get("answer", "").strip()

    return {
        "text": f"""### Instruction:
{instruction}

### Response:
{output}"""
    }

dataset = dataset.map(
    format_prompt,
    remove_columns=dataset.column_names
)

print(dataset[0]["text"])


# =========================================================
# STEP 7: LOAD MODEL (4-BIT)
# =========================================================

MODEL_NAME = "unsloth/llama-3-8b-bnb-4bit"
MAX_SEQ_LENGTH = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = MAX_SEQ_LENGTH,
    dtype = torch.float16,
    load_in_4bit = True,
)

print("✅ Model loaded")


# =========================================================
# STEP 8: APPLY LORA
# =========================================================

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj"
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 42,
)

print("✅ LoRA applied")


# =========================================================
# STEP 9: TRAIN
# =========================================================

from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = MAX_SEQ_LENGTH,
    args = TrainingArguments(
        output_dir = "./indian_legal_llama",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 3,
        learning_rate = 2e-4,
        fp16 = True,
        logging_steps = 10,
        save_steps = 500,
        save_total_limit = 2,
        optim = "adamw_8bit",
        report_to = "none",
    ),
)

trainer.train()


# =========================================================
# STEP 10: SAVE MODEL
# =========================================================

trainer.save_model("./indian_legal_llama")
tokenizer.save_pretrained("./indian_legal_llama")

print("🎉 FINE-TUNING COMPLETE!")


Torch: 2.10.0+cu128
CUDA: True
Triton: 3.6.0
✅ Unsloth imported successfully
Using Colab cache for faster access to the 'llm-fine-tuning-dataset-of-indian-legal-texts' dataset.
📂 Dataset path: /kaggle/input/llm-fine-tuning-dataset-of-indian-legal-texts
Loading constitution_qa.json


Generating train split: 0 examples [00:00, ? examples/s]

Loading ipc_qa.json


Generating train split: 0 examples [00:00, ? examples/s]

Loading crpc_qa.json


Generating train split: 0 examples [00:00, ? examples/s]

Total samples: 14543
{'question': 'What is India according to the Union and its Territory?', 'answer': 'India, that is Bharat, shall be a Union of States.'}


Map:   0%|          | 0/14543 [00:00<?, ? examples/s]

### Instruction:
What is India according to the Union and its Territory?

### Response:
India, that is Bharat, shall be a Union of States.
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

✅ Model loaded


Unsloth 2026.1.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


✅ LoRA applied


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/14543 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 14,543 | Num Epochs = 3 | Total steps = 5,454
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss
10,2.0891
20,1.4841
30,1.4985
40,1.4586
50,1.4174
60,1.3485
70,1.3882
80,1.3885
90,1.4139
100,1.3693


Step,Training Loss
10,2.0891
20,1.4841
30,1.4985
40,1.4586
50,1.4174
60,1.3485
70,1.3882
80,1.3885
90,1.4139
100,1.3693


🎉 FINE-TUNING COMPLETE!


In [6]:
import torch
from unsloth import FastLanguageModel

MODEL_DIR = "./indian_legal_llama"   # your saved model
MAX_SEQ_LENGTH = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_DIR,
    max_seq_length = MAX_SEQ_LENGTH,
    dtype = torch.float16,
    load_in_4bit = True,
)

FastLanguageModel.for_inference(model)

print("✅ Model loaded for inference")


==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✅ Model loaded for inference


In [8]:
def ask_legal_bot(question, max_new_tokens=256):
    prompt = f"""### Instruction:
{question}

### Response:
"""

    inputs = tokenizer(
        prompt,
        return_tensors="pt"
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.3,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id,
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("### Response:")[-1].strip()


In [9]:
print(ask_legal_bot(
    "What is the difference between cognizable and non-cognizable offences under CrPC?"
))


Cognizable offences are serious in nature, whereas non-cognizable offences are less serious. In a cognizable offence, the police can make an arrest without a warrant, while in a non-cognizable offence, the police can only make an arrest with a warrant. Additionally, in a cognizable offence, the police can investigate the case without the order of a magistrate, while in a non-cognizable offence, the police can only investigate the case with the order of a magistrate. Under CrPC, 42, 43 and 44 deal with cognizable offences, while 45, 46 and 47 deal with non-cognizable offences. However, under CrPC, 41 deals with both cognizable and non-cognizable offences. This means that the police can make an arrest in both cases, regardless of whether it's a cognizable or non-cognizable offence. The text does not provide specific details on what these sections under CrPC entail. It's possible that these sections relate to the procedure for making an arrest or the rights of arrested persons. However, t