# ðŸ§  Environmental LLM Fine-tuning Demo

This notebook demonstrates how to fine-tune a language model for environmental domain tasks using LoRA/QLoRA.

## Contents
1. Setup and Installation
2. Data Preparation
3. Model Loading
4. LoRA Configuration
5. Training
6. Inference

## 1. Setup and Installation

In [None]:
# Install dependencies (uncomment if needed)
# !pip install torch transformers peft accelerate bitsandbytes datasets trl

In [None]:
import sys
sys.path.insert(0, '..')

import torch
import json
from pathlib import Path

# Check GPU
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 2. Data Preparation

In [None]:
# Load sample data
data_path = Path('../data/raw/climate_qa_comprehensive.json')

with open(data_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

print(f"Loaded {len(data)} examples")
print("\nSample example:")
print(json.dumps(data[0], indent=2, ensure_ascii=False))

In [None]:
from src.dataset import print_dataset_stats

# Print statistics
print_dataset_stats(data)

In [None]:
from src.config import format_instruction

# Format an example using Alpaca template
example = data[0]
formatted = format_instruction(
    instruction=example['instruction'],
    input_text=example.get('input', ''),
    output=example['output'],
    template_name='alpaca'
)

print("Formatted example:")
print(formatted)

## 3. Model Loading

In [None]:
from src.model_utils import load_model, load_tokenizer

# Choose a small model for demo
MODEL_NAME = "microsoft/phi-2"  # 2.7B parameters

# Load tokenizer
tokenizer = load_tokenizer(MODEL_NAME)
print(f"Tokenizer vocab size: {len(tokenizer)}")

In [None]:
# For GPU with 8GB VRAM, use QLoRA
USE_QLORA = True

if USE_QLORA:
    from src.config import QuantizationConfig
    
    quant_config = QuantizationConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype="float16",
        bnb_4bit_use_double_quant=True,
    )
    
    model = load_model(
        model_name=MODEL_NAME,
        quantization_config=quant_config,
    )
else:
    model = load_model(
        model_name=MODEL_NAME,
        torch_dtype="float16",
    )

## 4. LoRA Configuration

In [None]:
from src.config import LoRAConfig
from src.model_utils import apply_lora

# LoRA configuration
lora_config = LoRAConfig(
    r=16,              # Rank
    lora_alpha=32,     # Scaling factor
    lora_dropout=0.05, # Dropout
    target_modules=[   # Modules to adapt
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ],
)

# Apply LoRA
model = apply_lora(model, lora_config)
model.print_trainable_parameters()

## 5. Training

In [None]:
from src.dataset import create_hf_dataset

# Create HuggingFace dataset
datasets = create_hf_dataset(
    data=data,
    tokenizer=tokenizer,
    max_length=512,  # Shorter for demo
    template_name="alpaca",
    train_split=0.9,
)

print(f"Train samples: {len(datasets['train'])}")
print(f"Eval samples: {len(datasets['eval'])}")

In [None]:
from src.config import TrainingConfig
from src.trainer import EnvironmentalTrainer

# Training configuration (reduced for demo)
training_config = TrainingConfig(
    output_dir="../models/demo_output",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=5,
    save_steps=50,
    report_to="tensorboard",
)

# Create trainer
trainer = EnvironmentalTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=datasets['train'],
    eval_dataset=datasets['eval'],
    training_config=training_config,
)

In [None]:
# Train (uncomment to run)
# result = trainer.train()
# print(f"Training loss: {result['training_loss']:.4f}")

## 6. Inference

In [None]:
# Test inference with base model (before training)
model.eval()

def generate_response(instruction, max_tokens=128):
    prompt = format_instruction(
        instruction=instruction,
        template_name="alpaca"
    )
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response.strip()

In [None]:
# Test questions
test_questions = [
    "What is climate change?",
    "How do electric vehicles help the environment?",
    "What is ESG investing?",
]

for question in test_questions:
    print(f"Q: {question}")
    response = generate_response(question)
    print(f"A: {response}\n")
    print("-" * 50)

## Next Steps

1. **Full Training**: Run complete training with more epochs
2. **Evaluation**: Test on benchmark questions
3. **Merge Weights**: Merge LoRA into base model
4. **Deploy**: Export to GGUF or serve via API

See `scripts/train.py` for full training workflow.