### 1. Install Dependencies

In [None]:

%pip install unsloth accelerate transformers datasets bitsandbytes safetensors -q

### 2. Load Model + Tokenizer (4-bit quantized)

In [None]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

model_name = "unsloth/tinyllama-1.1b-chat"   # or any SLM you prefer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name,
    load_in_4bit=True,           # enables low-VRAM training
    max_seq_length=2048,
)

### 3. Enable LoRA Fine-Tuning

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    target_modules=[
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj",
    ],
)

### Load + Preprocess Dataset

In [None]:
from datasets import load_dataset

# dataset must be a JSON file with fields: instruction, output
dataset = load_dataset("json", data_files="data.json")

# Formatting into instruction-tuning template
def formatting(example):
    prompt = f"""### Instruction:
{example['instruction']}

### Response:
{example['output']}"""
    return {"text": prompt}

dataset = dataset.map(formatting)

# Tokenize
def tokenize(example):
    return tokenizer(
        example["text"],
        truncation=True,
        max_length=2048,
    )

tokenized = dataset["train"].map(tokenize)

### 5. Training Configuration

In [None]:
from transformers import TrainingArguments, Trainer
import torch

training_args = TrainingArguments(
    output_dir="./outputs",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    num_train_epochs=2,
    logging_steps=10,
    bf16=torch.cuda.is_available(),
    fp16=not torch.cuda.is_available(),
)

### 6. Train the Model

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
)

trainer.train()

### 7. Save Full Model + Tokenizer

In [None]:
model.save_pretrained("finetuned-slm")
tokenizer.save_pretrained("finetuned-slm")

### 8. Export Only the LoRA Weights (Optional)

In [None]:
model.save_pretrained("lora-only")

### 9. Convert to Safetensors Format

In [None]:
import torch
from safetensors.torch import save_file
import os

# Convert LoRA adapter to safetensors
adapter_dict = torch.load("lora-only/adapter_model.bin")
save_file(adapter_dict, "lora-only/adapter_model.safetensors")

# Remove old .bin file
os.remove("lora-only/adapter_model.bin")

### 10. Run Inference to Test Fine-Tuned Model

In [None]:
from transformers import AutoModelForCausalLM, pipeline

# Load finetuned model
ft_model = AutoModelForCausalLM.from_pretrained(
    "finetuned-slm",
    torch_dtype="auto",
    device_map="auto",
)
ft_tokenizer = AutoTokenizer.from_pretrained("finetuned-slm")

pipe = pipeline(
    "text-generation",
    model=ft_model,
    tokenizer=ft_tokenizer,
    max_new_tokens=150,
)

prompt = """### Instruction:
How does a smart meter detect abnormal spikes in household energy use?

### Response:
"""

result = pipe(prompt)
print(result[0]["generated_text"])


### 11. Optional â€“ Batch Evaluation on Multiple Prompts

In [None]:
test_prompts = [
    "Explain load imbalance detection in smart meters.",
    "How do smart meters handle voltage fluctuations?",
    "Describe methods to detect reverse energy flow."
]

for p in test_prompts:
    formatted = f"### Instruction:\n{p}\n\n### Response:\n"
    out = pipe(formatted)[0]["generated_text"]
    print("Prompt:", p)
    print("Answer:", out)
    print("-" * 60)