In [None]:
%%capture
%pip install accelerate peft bitsandbytes transformers trl

In [None]:
import os
from transformers import (
    BitsAndBytesConfig,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

In [None]:
from datasets import load_dataset, Dataset
import json

# load my training file
with open("train.json") as f:
    data = json.load(f)

# Convert to HuggingFace dataset
# optional
dataset = Dataset.from_list(data)

# Format prompt-response for training
def format_prompt(example):
    return {
        "text": f"{example['prompt']} {example['response']}"
    }

dataset = dataset.map(format_prompt)


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "NousResearch/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto")


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

# Apply LoRA for efficient fine-tuning
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Tokenize
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=100,
    save_total_limit=1,
    learning_rate=2e-4,
    fp16=True,
    report_to="none"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kw

Step,Training Loss
10,1.7022
20,1.4447
30,1.3674


  return fn(*args, **kwargs)


TrainOutput(global_step=33, training_loss=1.4934617533828274, metrics={'train_runtime': 107.4895, 'train_samples_per_second': 0.586, 'train_steps_per_second': 0.307, 'total_flos': 1279570602885120.0, 'train_loss': 1.4934617533828274, 'epoch': 3.0})

In [None]:
def classify_case(case_text):
    input_text = (
        "Classify the legal issue area of the following SCOTUS case:\n"
        f"{case_text}\n"
        "Answer with a short label like 'Criminal Law', 'Civil Procedure', 'Commerce Clause', etc.\n"
        "Answer:"
    )
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    output = model.generate(
        **inputs,
        max_new_tokens=10,
        temperature=0.0,  # deterministic output
        do_sample=False,  # disables sampling
        pad_token_id=tokenizer.eos_token_id
    )
    result = tokenizer.decode(output[0], skip_special_tokens=True)
    return result.split("Answer:")[-1].strip()


In [None]:
case = "In this SCOTUS opinion, what if petitioner caught someone using narcotics?"

In [None]:
print(classify_case(case))

Criminal Law
