# Granite-8B Fine-Tuning: Iterative Teacher-Student Distillation (Colab Native)

**Target**: Embedded Automotive Code Generation (AVB/TSN)  
**Model**: IBM Granite-8B-Code-Instruct-128K  
**Teacher**: Claude Sonnet 4.5 via Amazon Bedrock  
**Compute**: Google Colab A100 GPU (High RAM)  
**Data**: AWS S3  
**Storage**: AWS S3 (checkpoints & model output)

## Pipeline Stages
1. Environment Setup & GPU Optimization
2. Credentials (HuggingFace + AWS) & Configuration
3. Data Preparation (S3 → Training JSONL)
4. Teacher Output Generation (Bedrock Claude)
5. QLoRA Fine-Tuning with Iterative Distillation
6. Evaluation & Quality Metrics
7. Model Export to S3

## 1. Environment Setup

In [10]:
# Install dependencies (S3 + Bedrock via boto3, no SageMaker)
!pip install -q torch transformers datasets accelerate peft trl bitsandbytes \
    sentencepiece protobuf boto3 python-dotenv pyyaml tqdm \
    rouge-score sacrebleu evaluate scipy huggingface_hub

In [None]:
# Project setup, credentials loading, and S3 output configuration
import os, sys

# Load .env file if available (VS Code / local development)
try:
    from dotenv import load_dotenv
    # Try repo root .env first, then parent dirs
    for env_path in ['/content/fine_tuning_IBM_8B_v2/.env', '.env', '../.env']:
        if os.path.exists(env_path):
            load_dotenv(env_path, override=True)
            print(f"Loaded credentials from {env_path}")
            break
    else:
        print("No .env file found — will prompt for credentials interactively")
except ImportError:
    print("python-dotenv not installed — will prompt for credentials interactively")

# Helper: get secret from env var, .env, or interactive prompt
def get_secret(key, default=None):
    """Get credential from env var (incl. .env) or prompt interactively."""
    val = os.environ.get(key)
    if val:
        return val
    if default is not None:
        return default
    # Interactive fallback — works in VS Code + Colab extension
    import getpass
    val = getpass.getpass(f"Enter {key}: ")
    if val:
        os.environ[key] = val  # Cache for rest of session
    return val or default

# S3 bucket for training outputs (models, checkpoints, summaries)
OUTPUT_BUCKET = 'granite-8b-training-outputs'
S3_OUTPUT_PREFIX = 'runs'

# Clone private repo using GitHub token
GITHUB_TOKEN = get_secret('GITHUB_TOKEN')
if not GITHUB_TOKEN:
    raise RuntimeError("GITHUB_TOKEN not set. Add it to .env, env var, or enter when prompted.")
REPO_URL = f'https://{GITHUB_TOKEN}@github.com/sriramach2000/fine_tuning_IBM_8B_v2.git'
PROJECT_ROOT = '/content/fine_tuning_IBM_8B_v2'

if not os.path.exists(PROJECT_ROOT):
    !git clone {REPO_URL} {PROJECT_ROOT}
else:
    print(f"Repo already cloned at {PROJECT_ROOT}")

os.chdir(PROJECT_ROOT)
sys.path.insert(0, PROJECT_ROOT)

# Re-load .env from cloned repo if it exists
try:
    from dotenv import load_dotenv
    repo_env = os.path.join(PROJECT_ROOT, '.env')
    if os.path.exists(repo_env):
        load_dotenv(repo_env, override=True)
        print(f"Loaded credentials from {repo_env}")
except Exception:
    pass

# Create local data/output directories
for d in ['data/raw', 'data/processed', 'data/splits', 'data/teacher_outputs',
          'data/eval', 'output/notebook_run', 'models/notebook_output', 'checkpoints']:
    os.makedirs(os.path.join(PROJECT_ROOT, d), exist_ok=True)

print(f"Project root: {PROJECT_ROOT}")
print(f"Output S3 bucket: s3://{OUTPUT_BUCKET}/{S3_OUTPUT_PREFIX}/")
print(f"Contents: {os.listdir(PROJECT_ROOT)}")

No .env file found — will prompt for credentials interactively
Repo already cloned at /content/fine_tuning_IBM_8B_v2
Project root: /content/fine_tuning_IBM_8B_v2
Output S3 bucket: s3://granite-8b-training-outputs/runs/
Contents: ['README.md', 'docker', 'PLAN.md', 'training', 'evaluation', 'pytest.ini', 'scripts', '.gitignore', 'context_db', '.git', 'checkpoints', 'data', 'tests', 'config.yaml', 'aws', 'requirements.txt', 'models', 'PIPELINE_STATE_MACHINE.md', 'output', 'run_gpu_tests.py']


In [9]:
# ── All Imports ──────────────────────────────────────────────────────
# Standard library
import gc
import json
import os
import shutil
import sys
from pathlib import Path

# Core ML / DL
import torch
import yaml

# HuggingFace ecosystem
from datasets import load_dataset
from huggingface_hub import HfApi, login
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

# AWS
import boto3

# Project-local imports (requires sys.path from cell above)
from evaluation.code_quality_metrics import CodeQualityEvaluator
from training.iterative_distillation import (
    IterativeDistillationTrainer,
    DistillationConfig,
)
from training.train_granite_qlora import (
    NaNInfDetectionCallback,
    CustomEarlyStoppingCallback,
    format_chat_template,
)
from scripts.generate_teacher_outputs import (
    BedrockTeacherGenerator,
    create_automotive_system_prompt,
    create_sample_prompts,
)
from scripts.run_iterative_pipeline import create_sample_eval_prompts
from scripts.prepare_automotive_data import AutomotiveDataPipeline

In [None]:
# ── Validate All API Keys & Credentials ────────────────────────────────
print("=" * 60)
print("CREDENTIAL VALIDATION")
print("=" * 60)

# Ensure all credentials are loaded (prompts if not in env/.env)
for key in ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_REGION',
            'AMAZON_BEDROCK_MODEL_API_KEY', 'HF_TOKEN', 'GITHUB_TOKEN']:
    val = get_secret(key, default=('us-east-1' if key == 'AWS_REGION' else None))
    if val:
        os.environ[key] = val

all_ok = True

# 1. AWS Identity
try:
    sts = boto3.client('sts', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
    identity = sts.get_caller_identity()
    print(f"[OK] AWS STS — Account {identity['Account']}")
except Exception as e:
    print(f"[FAIL] AWS STS — {e}")
    all_ok = False

# 2. S3 data bucket
try:
    s3_test = boto3.client('s3', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
    s3_test.head_bucket(Bucket='granite-8b-unified-automotive-data')
    print("[OK] S3 data bucket (granite-8b-unified-automotive-data)")
except Exception as e:
    print(f"[FAIL] S3 data bucket — {e}")
    all_ok = False

# 3. S3 output bucket
try:
    s3_test.head_bucket(Bucket=OUTPUT_BUCKET)
    print(f"[OK] S3 output bucket ({OUTPUT_BUCKET})")
except Exception as e:
    print(f"[FAIL] S3 output bucket ({OUTPUT_BUCKET}) — {e}")
    print("       Create it: aws s3 mb s3://" + OUTPUT_BUCKET)
    all_ok = False

# 4. Bedrock
try:
    bedrock_test = boto3.client('bedrock-runtime', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
    test_body = json.dumps({
        'anthropic_version': 'bedrock-2023-05-31',
        'max_tokens': 16,
        'messages': [{'role': 'user', 'content': 'Say OK'}]
    })
    resp = bedrock_test.invoke_model(
        modelId='us.anthropic.claude-sonnet-4-5-20250929-v1:0',
        body=test_body, contentType='application/json', accept='application/json',
    )
    print("[OK] Bedrock (Claude Sonnet 4.5)")
except Exception as e:
    print(f"[FAIL] Bedrock — {e}")
    all_ok = False

# 5. HuggingFace
try:
    hf_token = os.environ.get('HF_TOKEN')
    api = HfApi(token=hf_token)
    hf_info = api.whoami()
    print(f"[OK] HuggingFace — user \"{hf_info['name']}\"")
except Exception as e:
    print(f"[FAIL] HuggingFace — {e}")
    all_ok = False

# 6. GitHub (repo access)
try:
    import urllib.request
    gh_token = os.environ.get('GITHUB_TOKEN')
    req = urllib.request.Request(
        'https://api.github.com/repos/sriramach2000/fine_tuning_IBM_8B_v2',
        headers={'Authorization': f'token {gh_token}', 'User-Agent': 'colab'}
    )
    urllib.request.urlopen(req)
    print("[OK] GitHub — repo accessible")
except Exception as e:
    print(f"[FAIL] GitHub — {e}")
    all_ok = False

print("=" * 60)
if all_ok:
    print("All credentials valid. Safe to proceed.")
else:
    print("WARNING: Some credentials failed. Fix before continuing.")
print("=" * 60)

CREDENTIAL VALIDATION
[OK] AWS STS — Account 122634724608
[OK] S3 data bucket (granite-8b-unified-automotive-data)
[OK] S3 output bucket (granite-8b-training-outputs)
[OK] Bedrock (Claude Sonnet 4.5)
[OK] HuggingFace — user "sriramach"
[OK] GitHub — repo accessible
All credentials valid. Safe to proceed.


In [None]:
# Verify GPU availability - requires A100 + High RAM runtime
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    props = torch.cuda.get_device_properties(0)
    vram_gb = getattr(props, 'total_memory', getattr(props, 'total_mem', 0)) / 1e9
    print(f"GPU: {gpu_name}")
    print(f"VRAM: {vram_gb:.1f} GB")

    if 'A100' not in gpu_name:
        print(f"\n⚠ WARNING: Expected A100 but got {gpu_name}.")
        print("Go to Runtime > Change runtime type > A100 GPU, High RAM")
else:
    raise RuntimeError(
        "No GPU detected! Go to Runtime > Change runtime type > "
        "Hardware accelerator: A100 GPU, Runtime shape: High RAM"
    )

# --- A100 GPU Optimization ---
print("\n--- GPU Optimization ---")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
print("TF32 enabled for matmul and cuDNN")

torch.backends.cudnn.benchmark = True
print("cuDNN benchmark mode enabled")

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
print("CUDA memory: expandable_segments enabled")

gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

free_mem, total_mem = torch.cuda.mem_get_info()
print(f"\nAvailable VRAM: {free_mem / 1e9:.1f} / {total_mem / 1e9:.1f} GB")

PyTorch version: 2.9.0+cu126
CUDA available: True
GPU: NVIDIA A100-SXM4-80GB
VRAM: 85.2 GB

--- GPU Optimization ---
TF32 enabled for matmul and cuDNN
cuDNN benchmark mode enabled
CUDA memory: expandable_segments enabled

Available VRAM: 84.7 / 85.2 GB


## 2. Credentials (HuggingFace + AWS) & Configuration

In [None]:
# Authenticate with HuggingFace + AWS
# --- HuggingFace ---
try:
    HF_TOKEN = get_secret('HF_TOKEN')
    login(token=HF_TOKEN)
    os.environ['HF_TOKEN'] = HF_TOKEN
    print("HF: Logged in")
except Exception:
    HF_TOKEN = input("Enter your HuggingFace token: ")
    login(token=HF_TOKEN)
    os.environ['HF_TOKEN'] = HF_TOKEN

# --- AWS (S3 + Bedrock) ---
for key in ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_REGION', 'AMAZON_BEDROCK_MODEL_API_KEY']:
    try:
        val = get_secret(key)
        if val:
            os.environ[key] = val
    except Exception:
        pass

# Fallback: set AWS_REGION default
if not os.environ.get('AWS_REGION'):
    os.environ['AWS_REGION'] = 'us-east-1'

# Verify
print(f"\nHF_TOKEN: {'set' if os.environ.get('HF_TOKEN') else 'NOT SET'}")
print(f"AWS_ACCESS_KEY_ID: {'set' if os.environ.get('AWS_ACCESS_KEY_ID') else 'NOT SET'}")
print(f"AWS_SECRET_ACCESS_KEY: {'set' if os.environ.get('AWS_SECRET_ACCESS_KEY') else 'NOT SET'}")
print(f"AWS_REGION: {os.environ.get('AWS_REGION')}")
print(f"BEDROCK_API_KEY: {'set' if os.environ.get('AMAZON_BEDROCK_MODEL_API_KEY') else 'NOT SET'}")

# Quick S3 connectivity check
try:
    s3 = boto3.client('s3', region_name=os.environ['AWS_REGION'])
    s3.head_bucket(Bucket='granite-8b-unified-automotive-data')
    print("\nS3 bucket accessible!")
except Exception as e:
    print(f"\nS3 access error: {e}")

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


HF: Logged in

HF_TOKEN: set
AWS_ACCESS_KEY_ID: set
AWS_SECRET_ACCESS_KEY: set
AWS_REGION: us-east-1
BEDROCK_API_KEY: set

S3 bucket accessible!


In [None]:
# Load project config and override cloud-specific settings for Colab
config_path = os.path.join(PROJECT_ROOT, 'config.yaml')
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Override paths for Colab environment
config['paths'] = {
    'data': {
        'processed_dir': os.path.join(PROJECT_ROOT, 'data', 'processed'),
        'splits_dir': os.path.join(PROJECT_ROOT, 'data', 'splits'),
        'teacher_outputs_dir': os.path.join(PROJECT_ROOT, 'data', 'teacher_outputs'),
        'distillation_dir': os.path.join(PROJECT_ROOT, 'data', 'distillation'),
    },
    'training': {
        'output_dir': os.path.join(PROJECT_ROOT, 'output', 'notebook_run'),
        'checkpoint_dir': os.path.join(PROJECT_ROOT, 'checkpoints'),
        'logs_dir': os.path.join(PROJECT_ROOT, 'logs'),
    },
}

splits_dir = config['paths']['data']['splits_dir']

print(f"Project: {config['project']['name']}")
print(f"Model: {config['model']['name']}")
print(f"Checkpoints: {config['paths']['training']['checkpoint_dir']}")
print(f"Output bucket: s3://{OUTPUT_BUCKET}/{S3_OUTPUT_PREFIX}/")

Project: granite-8b-avb-tsn-finetuning
Model: ibm-granite/granite-8b-code-instruct-128k
Checkpoints: /content/fine_tuning_IBM_8B_v2/checkpoints
Output bucket: s3://granite-8b-training-outputs/runs/


## 3. Data Preparation (S3 → Training JSONL)

In [None]:
# Download from S3 and process into training JSONL
# All processing runs locally on Colab (no SageMaker)
import time

pipeline = AutomotiveDataPipeline(
    s3_bucket=config['aws']['s3']['bucket_name'],
    region=os.environ.get('AWS_REGION', 'us-east-1'),
    local_data_dir=os.path.join(PROJECT_ROOT, 'data', 'raw'),
    processed_dir=os.path.join(PROJECT_ROOT, 'data', 'processed'),
    splits_dir=splits_dir,
)

# Download from S3 and process on Colab (full dataset)
start_time = time.time()
train_examples, val_examples = pipeline.run_pipeline(
    download_data=True,
    max_files_per_type=0,  # 0 = no limit (process all files)
    train_ratio=0.9,
)
elapsed = time.time() - start_time

print(f"\nTrain examples: {len(train_examples)}")
print(f"Val examples: {len(val_examples)}")
print(f"Processing time: {elapsed/60:.1f} minutes")

# Upload processed splits to S3 for persistence across runtime resets
s3_splits = boto3.client('s3', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
splits_prefix = f"{S3_OUTPUT_PREFIX}/data/splits"

for split_name in ['train.jsonl', 'val.jsonl']:
    local_path = os.path.join(splits_dir, split_name)
    if os.path.exists(local_path):
        s3_key = f"{splits_prefix}/{split_name}"
        size_mb = os.path.getsize(local_path) / 1e6
        print(f"Uploading {split_name} ({size_mb:.1f} MB) -> s3://{OUTPUT_BUCKET}/{s3_key}")
        s3_splits.upload_file(local_path, OUTPUT_BUCKET, s3_key)

print(f"Splits uploaded to s3://{OUTPUT_BUCKET}/{splits_prefix}/")

In [None]:
# Inspect a sample training example
train_file = Path(splits_dir) / 'train.jsonl'

if train_file.exists():
    with open(train_file, 'r') as f:
        first_example = json.loads(f.readline())
    print("Sample training example:")
    print(json.dumps(first_example, indent=2)[:500])
else:
    print(f"No training file at {train_file}")

Sample training example:
{
  "messages": [
    {
      "role": "user",
      "content": "Implement the function 'if' for automotive embedded systems. Context: IEEE 802.1Qbv Time-Aware Shaper"
    },
    {
      "role": "assistant",
      "content": "else if (si.instance_state == DDS::NOT_ALIVE_NO_WRITERS_INSTANCE_STATE) {\n        ACE_DEBUG((LM_DEBUG, ACE_TEXT(\"%N:%l: INFO: instance is unregistered\\n\")));\n\n      }"
    }
  ]
}


## 4. Teacher Output Generation (Bedrock Claude)

In [None]:
# Initialize Bedrock teacher model (Claude Sonnet 4.5)
teacher = BedrockTeacherGenerator(
    model_id=config['distillation']['teacher_model'],
    region=os.environ.get('AWS_REGION', 'us-east-1'),
    max_tokens=config['distillation']['max_teacher_tokens'],
    temperature=config['aws']['bedrock']['temperature'],
)

# Connectivity test
test_result = teacher.generate_response(
    prompt="Generate a C struct for a TSN gate control list entry with priority, gate state, and time interval fields.",
    system_prompt=create_automotive_system_prompt(),
)

if test_result['success']:
    print("Bedrock teacher model connected!")
    print(f"\nResponse preview:\n{test_result['response'][:300]}...")
else:
    print(f"Error: {test_result['error']}")

In [None]:
# Generate teacher outputs for sample prompts
sample_prompts = create_sample_prompts()
system_prompt = create_automotive_system_prompt()

output_file = os.path.join(PROJECT_ROOT, 'data', 'teacher_outputs', 'bedrock_outputs.jsonl')
os.makedirs(os.path.dirname(output_file), exist_ok=True)

results = teacher.generate_batch(
    prompts=sample_prompts,
    system_prompt=system_prompt,
    output_file=output_file,
    checkpoint_interval=5,
    max_workers=5,
)

successful = sum(1 for r in results if r['success'])
print(f"\nGenerated {successful}/{len(results)} teacher outputs")

## 5. QLoRA Fine-Tuning with Iterative Distillation

In [None]:
# Load training and validation datasets
train_jsonl = os.path.join(splits_dir, 'train.jsonl')
val_jsonl = os.path.join(splits_dir, 'val.jsonl')

train_dataset = load_dataset('json', data_files=train_jsonl, split='train')
val_dataset = load_dataset('json', data_files=val_jsonl, split='train')

print(f"Train dataset: {len(train_dataset)} examples")
print(f"Val dataset: {len(val_dataset)} examples")
print(f"Columns: {train_dataset.column_names}")

In [None]:
# Load Granite-8B with QLoRA
MODEL_NAME = config['model']['name']
print(f"Loading {MODEL_NAME}...")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type=config['qlora']['bnb_4bit_quant_type'],
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=config['qlora']['bnb_4bit_use_double_quant'],
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map='auto',
    torch_dtype=torch.bfloat16,
    attn_implementation='eager',
    token=os.environ.get('HF_TOKEN'),
)

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=config['qlora']['lora_r'],
    lora_alpha=config['qlora']['lora_alpha'],
    lora_dropout=config['qlora']['lora_dropout'],
    target_modules=config['qlora']['lora_target_modules'],
    bias='none',
    task_type='CAUSAL_LM',
)

model = get_peft_model(model, lora_config)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=os.environ.get('HF_TOKEN'))
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"\nTrainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")

if torch.cuda.is_available():
    print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f} GB")

In [None]:
# Set up SFTTrainer
output_dir = os.path.join(PROJECT_ROOT, 'output', 'notebook_run')
os.makedirs(output_dir, exist_ok=True)

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=config['training']['num_epochs'],
    per_device_train_batch_size=config['training']['per_device_train_batch_size'],
    per_device_eval_batch_size=config['training']['per_device_train_batch_size'],
    gradient_accumulation_steps=config['training']['gradient_accumulation_steps'],
    learning_rate=config['training']['learning_rate'],
    lr_scheduler_type=config['training']['lr_scheduler_type'],
    warmup_ratio=config['training']['warmup_ratio'],
    weight_decay=config['training']['weight_decay'],
    max_grad_norm=config['training']['max_grad_norm'],
    optim=config['training']['optim'],
    fp16=False,
    bf16=True,
    gradient_checkpointing=True,
    logging_dir=os.path.join(output_dir, 'logs'),
    logging_steps=config['training']['logging_steps'],
    logging_strategy='steps',
    evaluation_strategy='steps',
    eval_steps=config['training']['eval_steps'],
    save_strategy='steps',
    save_steps=config['training']['save_steps'],
    save_total_limit=2,
    load_best_model_at_end=False,
    report_to=[],
)

def formatting_func(example):
    return format_chat_template(example, tokenizer)

sft_trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    formatting_func=formatting_func,
    max_seq_length=config['model']['max_seq_length'],
    callbacks=[
        NaNInfDetectionCallback(),
        CustomEarlyStoppingCallback(
            patience=config['training']['early_stopping_patience'],
        ),
    ],
)

print("SFTTrainer ready.")

In [None]:
# Initialize iterative distillation components
evaluator = CodeQualityEvaluator(
    strict_mode=True,
    quality_threshold=config['distillation']['min_score_threshold'],
)

distillation_config = DistillationConfig(
    quality_threshold=config['distillation']['min_score_threshold'],
    convergence_threshold=config['distillation']['convergence_threshold'],
    convergence_patience=3,
    teacher_model=config['distillation']['teacher_model'],
    max_corrections_per_epoch=500,
    max_parallel_teacher_calls=5,
    eval_samples_per_epoch=200,
    output_dir=output_dir,
    model_dir=os.path.join(PROJECT_ROOT, 'models', 'notebook_output'),
    corrections_dir=os.path.join(output_dir, 'corrections'),
)

distillation_trainer = IterativeDistillationTrainer(
    student_model=model,
    student_tokenizer=tokenizer,
    teacher_generator=teacher,  # BedrockTeacherGenerator
    quality_evaluator=evaluator,
    config=distillation_config,
    trainer=sft_trainer,
)

print("Iterative distillation trainer ready.")

In [None]:
# Load eval prompts
eval_prompts_file = Path(PROJECT_ROOT) / 'data' / 'eval' / 'eval_prompts.jsonl'
if eval_prompts_file.exists():
    with open(eval_prompts_file, 'r') as f:
        eval_prompts = [json.loads(line) for line in f]
    print(f"Loaded {len(eval_prompts)} eval prompts from file")
else:
    eval_prompts = create_sample_eval_prompts()
    print(f"Using {len(eval_prompts)} sample eval prompts")

In [None]:
# Run iterative distillation training loop
MAX_EPOCHS = config['distillation']['max_iterations']
EVAL_SAMPLES = min(len(eval_prompts), 200)

print("=" * 70)
print("ITERATIVE TEACHER-STUDENT DISTILLATION")
print("=" * 70)
print(f"Max epochs: {MAX_EPOCHS}")
print(f"Eval samples/epoch: {EVAL_SAMPLES}")
print(f"Quality threshold: {distillation_config.quality_threshold}")
print(f"Convergence target: {distillation_config.convergence_threshold}")
print("=" * 70)

for epoch in range(1, MAX_EPOCHS + 1):
    epoch_eval = eval_prompts[:EVAL_SAMPLES]

    metrics = distillation_trainer.train_epoch(
        train_dataset=train_dataset,
        eval_prompts=epoch_eval,
        epoch_num=epoch,
    )

    print(f"\n[Epoch {epoch}] Loss: {metrics.train_loss:.4f} | "
          f"Avg Score: {metrics.avg_student_score:.2f}/10 | "
          f"Corrections: {metrics.num_corrections} | "
          f"Rate: {metrics.correction_rate:.1%}")

    converged, reason = distillation_trainer.check_convergence()
    if converged:
        print(f"\n[CONVERGED] {reason}")
        break

summary = distillation_trainer.get_training_summary()
print("\n" + "=" * 70)
print("TRAINING COMPLETE")
print("=" * 70)
for k, v in summary.items():
    print(f"  {k}: {v}")

## 6. Evaluation & Quality Metrics

In [None]:
# Evaluate the fine-tuned model on sample prompts
model.eval()

test_prompts = [
    "Generate C code for a TSN Time-Aware Shaper (802.1Qbv) gate control list entry struct.",
    "Generate C code for AVB Stream Reservation Protocol talker advertisement.",
    "Generate C code for IEEE 802.1AS PTP timestamp comparison function.",
]

print("=" * 70)
print("POST-TRAINING EVALUATION")
print("=" * 70)

for i, prompt in enumerate(test_prompts):
    messages = [{"role": "user", "content": prompt}]

    if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template:
        formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    else:
        formatted = f"<|user|>\n{prompt}\n<|assistant|>\n"

    inputs = tokenizer(formatted, return_tensors='pt', truncation=True, max_length=4096)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        output_ids = model.generate(
            **inputs, max_new_tokens=512, do_sample=True,
            temperature=0.7, top_p=0.95,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated = tokenizer.decode(
        output_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True
    )

    quality = evaluator.evaluate(generated, prompt)

    print(f"\n--- Prompt {i+1} ---")
    print(f"Prompt: {prompt[:80]}...")
    print(f"Score: {quality}")
    print(f"Output preview: {generated[:200]}...")

## 7. Save & Export (S3)

In [None]:
# Save the fine-tuned model locally and upload to S3
model_output_dir = os.path.join(PROJECT_ROOT, 'models', 'notebook_output')
os.makedirs(model_output_dir, exist_ok=True)

model.save_pretrained(model_output_dir)
tokenizer.save_pretrained(model_output_dir)
print(f"Model saved locally: {model_output_dir}")

# Upload to S3
s3_client = boto3.client('s3', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
s3_model_prefix = f"{S3_OUTPUT_PREFIX}/models/notebook_output"

print(f"\nUploading model to s3://{OUTPUT_BUCKET}/{s3_model_prefix}/")
for f in Path(model_output_dir).glob('*'):
    if f.is_file():
        s3_key = f"{s3_model_prefix}/{f.name}"
        print(f"  {f.name} ({f.stat().st_size / 1e6:.1f} MB)")
        s3_client.upload_file(str(f), OUTPUT_BUCKET, s3_key)

print(f"Model uploaded to s3://{OUTPUT_BUCKET}/{s3_model_prefix}/")

In [None]:
# Save training summary locally and upload to S3
summary_file = os.path.join(output_dir, 'training_summary.json')
with open(summary_file, 'w') as f:
    json.dump(summary, f, indent=2)

# Upload to S3
s3_key = f"{S3_OUTPUT_PREFIX}/summaries/training_summary.json"
s3_client.upload_file(summary_file, OUTPUT_BUCKET, s3_key)

print(f"Summary saved to: {summary_file}")
print(f"Summary uploaded to: s3://{OUTPUT_BUCKET}/{s3_key}")
print(json.dumps(summary, indent=2))

In [11]:
# Upload model to data S3 bucket (for downstream consumption)
bucket = config['aws']['s3']['bucket_name']
s3_prefix = config['aws']['s3'].get('model_prefix', 'models') + '/notebook-finetuned'

print(f"Uploading model to s3://{bucket}/{s3_prefix}/")
for file_path in Path(model_output_dir).glob('*'):
    if file_path.is_file():
        s3_key = f"{s3_prefix}/{file_path.name}"
        print(f"  {file_path.name} ({file_path.stat().st_size / 1e6:.1f} MB) -> s3://{bucket}/{s3_key}")
        s3_client.upload_file(str(file_path), bucket, s3_key)

s3_client.upload_file(
    os.path.join(output_dir, 'training_summary.json'),
    bucket,
    f"{s3_prefix}/training_summary.json",
)

print(f"\nModel uploaded to s3://{bucket}/{s3_prefix}/")

# Optional: Push to HuggingFace Hub
PUSH_TO_HUB = False
HUB_REPO_NAME = "your-username/granite-8b-avb-tsn-finetuned"

if PUSH_TO_HUB:
    api = HfApi()
    api.create_repo(HUB_REPO_NAME, exist_ok=True, private=True)
    model.push_to_hub(HUB_REPO_NAME, token=os.environ.get('HF_TOKEN'))
    tokenizer.push_to_hub(HUB_REPO_NAME, token=os.environ.get('HF_TOKEN'))
    print(f"Model pushed to https://huggingface.co/{HUB_REPO_NAME}")

NameError: name 'config' is not defined