#### 00. Install dependancies

In [14]:
%%capture

# Core training libraries
!pip install -q \
    transformers==4.44.2 \
    datasets==2.20.0 \
    tokenizers==0.19.1 \
    accelerate==0.34.2 \
    peft==0.12.0 \
    trl==0.9.6 \
    bitsandbytes==0.43.1 \
    evaluate==0.4.2

# Utilities
!pip install -q \
    numpy \
    pandas \
    scikit-learn \
    rich \
    pyyaml \
    python-dotenv \
    tqdm

# Evaluation (requires pydantic v2)
!pip install -q --upgrade pydantic
!pip install -q google-genai rouge-score

print(" Installation complete!")
print(" All dependencies compatible (pydantic v2 + google-genai)")


## 1. Setting Up Environment Variables (Secrets)

In [15]:

# Create .env file with API key
import os

# Write .env file
# with open('.env', 'w') as f:
#     # Add the secrets if needed
#     f.write('GOOGLE_API_KEY=<api_key_here>\n')
#     f.write('HF_TOKEN=<api_key_here>\n')

# print(" .env file created")

In [16]:
# Verify it's loaded
from dotenv import load_dotenv
load_dotenv()
# Show only key names for security
try:
    with open('.env', 'r') as f:
        print(" Keys in .env file:")
        print("="*60)
        for line in f:
            line = line.strip()
            if line and not line.startswith('#'):
                key = line.split('=')[0]
                value_preview = line.split('=')[1][:10] + "..." if '=' in line else ""
                print(f"  {key} = {value_preview}")
        print("="*60)
except FileNotFoundError:
    print(" .env file not found")

 .env file not found


## 2. Environment & GPU Check

In [17]:
import sys
import torch

print("="*60)
print("ENVIRONMENT CHECK")
print("="*60)
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")
    print(f"Device capability: {torch.cuda.get_device_capability(0)}")
    print(f"Total VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
else:
    print(" WARNING: CUDA not available. Training will be VERY slow on CPU.")

print("="*60)

ENVIRONMENT CHECK
Python version: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
PyTorch version: 2.9.0+cpu
CUDA available: False


## 3. Seeds & Determinism

Setting up random seeds for reproducibility. .

In [None]:
import os
import random
import numpy as np
import torch

SEED = 42

# Set environment variable for Python hash seed
os.environ['PYTHONHASHSEED'] = str(SEED)

# Set seeds
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    # Note: These settings may impact performance
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

print(f" Seeds set to {SEED} for reproducibility")

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

## 4. Hugging Face Login

If you want to push your finetuned adapter to the Hugging Face Hub, uncomment and run the login line below.

You'll need a Hugging Face token with write permissions. Get one at: https://huggingface.co/settings/tokens

In [None]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
!hf auth login --token $HF_TOKEN

print("â„¹ Hugging Face login skipped. Uncomment login() to push models to Hub.")