# 01 - Environment Setup

**Thesis Section Reference:** Chapter 3.4 - Experimental Setup

This notebook sets up the environment for knowledge distillation experiments:
1. Load environment variables from `.env`
2. Verify Python packages and versions
3. Check device availability (MPS/CPU)
4. Create directory structure
5. Test basic model loading

## Important Notes for MPS (Apple Silicon)
- Use `device="mps"` when available, fallback to CPU
- Avoid bf16 (not supported on MPS)
- Use fp32 for stability (fp16 can cause issues)
- Clear MPS cache periodically with `torch.mps.empty_cache()`

In [1]:
# Load environment variables FIRST
import os
import sys
from pathlib import Path

# Add src to path
ROOT_DIR = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
sys.path.insert(0, str(ROOT_DIR / "src"))

# Load .env file
from dotenv import load_dotenv
env_path = ROOT_DIR / ".env"
if env_path.exists():
    load_dotenv(env_path)
    print(f"✓ Loaded environment from {env_path}")
else:
    print(f"⚠ Warning: .env file not found at {env_path}")
    print("  Please copy .env.example to .env and fill in your values")

✓ Loaded environment from /Users/pjere/Workshop/thesis-exp/.env


In [2]:
# Verify required packages
import importlib

REQUIRED_PACKAGES = [
    ("torch", "2.0.0"),
    ("transformers", "4.30.0"),
    ("datasets", "2.0.0"),
    ("evaluate", "0.4.0"),
    ("accelerate", "0.20.0"),
    ("peft", "0.4.0"),
    ("numpy", "1.20.0"),
    ("pandas", "1.3.0"),
    ("matplotlib", "3.4.0"),
    ("scipy", "1.7.0"),
    ("psutil", "5.8.0"),
    ("pyyaml", "5.4.0"),
    ("python-dotenv", "0.19.0"),
    ("tqdm", "4.60.0"),
]

print("Checking required packages...\n")
all_ok = True

for package, min_version in REQUIRED_PACKAGES:
    try:
        # Handle package name differences
        import_name = package.replace("-", "_")
        if import_name == "python_dotenv":
            import_name = "dotenv"
        if import_name == "pyyaml":
            import_name = "yaml"
            
        mod = importlib.import_module(import_name)
        version = getattr(mod, "__version__", "unknown")
        print(f"  ✓ {package}: {version}")
    except ImportError:
        print(f"  ✗ {package}: NOT INSTALLED")
        all_ok = False

if all_ok:
    print("\n✓ All required packages are installed")
else:
    print("\n⚠ Some packages are missing. Install with:")
    print("  pip install torch transformers datasets evaluate accelerate peft")
    print("  pip install numpy pandas matplotlib scipy psutil pyyaml python-dotenv tqdm")

Checking required packages...

  ✓ torch: 2.10.0
  ✓ transformers: 5.0.0
  ✓ datasets: 4.5.0
  ✓ evaluate: 0.4.6
  ✓ accelerate: 1.12.0
  ✓ peft: 0.18.1
  ✓ numpy: 2.4.2
  ✓ pandas: 3.0.0
  ✓ matplotlib: 3.10.8
  ✓ scipy: 1.17.0
  ✓ psutil: 7.2.2
  ✓ pyyaml: 6.0.3
  ✓ python-dotenv: unknown
  ✓ tqdm: 4.67.2

✓ All required packages are installed


In [3]:
# Check device availability
import torch

print("=" * 50)
print("Device Availability Check")
print("=" * 50)

# MPS (Apple Silicon)
mps_available = torch.backends.mps.is_available()
mps_built = torch.backends.mps.is_built()
print(f"\nMPS (Apple Silicon):")
print(f"  Built: {mps_built}")
print(f"  Available: {mps_available}")

# CUDA
cuda_available = torch.cuda.is_available()
print(f"\nCUDA:")
print(f"  Available: {cuda_available}")
if cuda_available:
    print(f"  Device count: {torch.cuda.device_count()}")
    print(f"  Current device: {torch.cuda.current_device()}")

# Determine best device
if mps_available:
    DEVICE = "mps"
elif cuda_available:
    DEVICE = "cuda"
else:
    DEVICE = "cpu"

# Override from environment
env_device = os.getenv("DEVICE", "").lower()
if env_device in ["mps", "cuda", "cpu"]:
    if env_device == "mps" and not mps_available:
        print(f"\n⚠ Requested MPS but not available, using {DEVICE}")
    elif env_device == "cuda" and not cuda_available:
        print(f"\n⚠ Requested CUDA but not available, using {DEVICE}")
    else:
        DEVICE = env_device

print(f"\n→ Selected device: {DEVICE}")

# Test device
try:
    x = torch.randn(10, 10).to(DEVICE)
    y = x @ x.T
    print(f"  ✓ Device test passed")
    del x, y
    if DEVICE == "mps":
        torch.mps.empty_cache()
except Exception as e:
    print(f"  ✗ Device test failed: {e}")
    DEVICE = "cpu"
    print(f"  → Falling back to CPU")

Device Availability Check

MPS (Apple Silicon):
  Built: True
  Available: True

CUDA:
  Available: False

→ Selected device: mps
  ✓ Device test passed


In [4]:
# Check system memory
import psutil

print("=" * 50)
print("System Memory")
print("=" * 50)

mem = psutil.virtual_memory()
print(f"\nTotal RAM: {mem.total / (1024**3):.1f} GB")
print(f"Available RAM: {mem.available / (1024**3):.1f} GB")
print(f"Used RAM: {mem.used / (1024**3):.1f} GB ({mem.percent}%)")

# Recommendations based on memory
available_gb = mem.available / (1024**3)
print(f"\nRecommendations based on {available_gb:.1f} GB available:")

if available_gb < 8:
    print("  ⚠ Low memory - use FAST_MODE=true and smaller models")
    print("  ⚠ Consider using teacher_model_local_fallback")
elif available_gb < 16:
    print("  ✓ Moderate memory - FAST_MODE recommended")
    print("  ✓ Can run 3B teacher models")
elif available_gb < 32:
    print("  ✓ Good memory - Can run most experiments")
    print("  ✓ May need gradient checkpointing for 7B models")
else:
    print("  ✓ Excellent memory - Full experiments possible")

System Memory

Total RAM: 36.0 GB
Available RAM: 14.3 GB
Used RAM: 16.5 GB (60.3%)

Recommendations based on 14.3 GB available:
  ✓ Moderate memory - FAST_MODE recommended
  ✓ Can run 3B teacher models


In [5]:
# Load experiment configuration
from config import load_config

try:
    config = load_config(str(ROOT_DIR / "configs" / "experiment.yaml"))
    print("✓ Configuration loaded successfully")
    print(f"\nExperiment: {config.experiment_name}")
    print(f"Fast mode: {config.fast_mode}")
    print(f"Device: {config.get_device()}")
    print(f"Precision: {config.get_precision()}")
    print(f"\nModels:")
    print(f"  Teacher (primary): {config.teacher.primary}")
    print(f"  Teacher (fallback): {config.teacher.local_fallback}")
    print(f"  Student S1: {config.student_s1.name}")
    print(f"  Student S2: {config.student_s2.name}")
    print(f"\nTraining:")
    print(f"  Epochs: {config.get_epochs()}")
    print(f"  Batch size: {config.get_batch_size()}")
    print(f"  Seeds: {config.get_seeds()}")
except Exception as e:
    print(f"✗ Failed to load configuration: {e}")
    raise

✓ Configuration loaded successfully

Experiment: kd_thesis_experiments
Fast mode: True
Device: mps
Precision: fp32

Models:
  Teacher (primary): meta-llama/Llama-3.2-8B-Instruct
  Teacher (fallback): Qwen/Qwen2.5-3B-Instruct
  Student S1: TinyLlama/TinyLlama-1.1B-Chat-v1.0
  Student S2: quantized-proxy

Training:
  Epochs: 1
  Batch size: 2
  Seeds: [42]


In [6]:
# Create directory structure
directories = [
    ROOT_DIR / "results" / "raw_runs",
    ROOT_DIR / "results" / "summary",
    ROOT_DIR / "results" / "figures",
    ROOT_DIR / "results" / "models",
    ROOT_DIR / "results" / "teacher_cache",
    ROOT_DIR / "hf_cache",
]

print("Creating directory structure...")
for dir_path in directories:
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"  ✓ {dir_path.relative_to(ROOT_DIR)}")

# Create .gitkeep files
for dir_path in directories:
    gitkeep = dir_path / ".gitkeep"
    gitkeep.touch(exist_ok=True)

print("\n✓ Directory structure created")

Creating directory structure...
  ✓ results/raw_runs
  ✓ results/summary
  ✓ results/figures
  ✓ results/models
  ✓ results/teacher_cache
  ✓ hf_cache

✓ Directory structure created


In [7]:
# Test Hugging Face authentication
from huggingface_hub import HfApi, login

hf_token = os.getenv("HF_TOKEN", "")

if hf_token and hf_token != "your_huggingface_token_here":
    try:
        # Try to login
        login(token=hf_token, add_to_git_credential=False)
        api = HfApi()
        user_info = api.whoami()
        print(f"✓ Logged in to Hugging Face as: {user_info.get('name', 'Unknown')}")
    except Exception as e:
        print(f"⚠ HF authentication warning: {e}")
        print("  You may not be able to access gated models")
else:
    print("⚠ No HF_TOKEN found in .env")
    print("  Some models may require authentication")
    print("  Get your token from: https://huggingface.co/settings/tokens")

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


✓ Logged in to Hugging Face as: pauljere97


In [8]:
# Test model loading (with fallback)
from transformers import AutoTokenizer, AutoModelForCausalLM
import gc

print("Testing model loading...\n")

# Try student model first (smaller)
student_model_name = os.getenv("STUDENT_S1", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")

try:
    print(f"Loading tokenizer: {student_model_name}")
    tokenizer = AutoTokenizer.from_pretrained(
        student_model_name,
        trust_remote_code=True,
        cache_dir=str(ROOT_DIR / "hf_cache")
    )
    
    # Ensure pad token
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    print(f"  ✓ Tokenizer loaded (vocab size: {tokenizer.vocab_size})")
    
    print(f"\nLoading model: {student_model_name}")
    model = AutoModelForCausalLM.from_pretrained(
        student_model_name,
        trust_remote_code=True,
        torch_dtype=torch.float32,  # Use fp32 for MPS stability
        cache_dir=str(ROOT_DIR / "hf_cache")
    )
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"  ✓ Model loaded ({total_params / 1e9:.2f}B parameters)")
    
    # Test inference
    print(f"\nTesting inference on {DEVICE}...")
    model = model.to(DEVICE)
    
    inputs = tokenizer("Hello, how are you?", return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"  ✓ Generation test passed")
    print(f"  Response: {response[:100]}...")
    
    # Cleanup
    del model, tokenizer, inputs, outputs
    gc.collect()
    if DEVICE == "mps":
        torch.mps.empty_cache()
    
    print("\n✓ Model loading test passed")
    
except Exception as e:
    print(f"\n✗ Model loading failed: {e}")
    print("\nTroubleshooting:")
    print("  1. Check your internet connection")
    print("  2. Verify HF_TOKEN if using gated models")
    print("  3. Try a different model in .env")
    raise

Testing model loading...

Loading tokenizer: TinyLlama/TinyLlama-1.1B-Chat-v1.0


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

  ✓ Tokenizer loaded (vocab size: 32000)

Loading model: TinyLlama/TinyLlama-1.1B-Chat-v1.0


`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

  ✓ Model loaded (1.10B parameters)

Testing inference on mps...
  ✓ Generation test passed
  Response: Hello, how are you?
I am fine, thank you.
How...

✓ Model loading test passed


In [9]:
# Environment Summary
print("=" * 60)
print("ENVIRONMENT SETUP COMPLETE")
print("=" * 60)
print(f"""
Configuration:
  Fast Mode: {os.getenv('FAST_MODE', 'true')}
  Device: {DEVICE}
  Precision: fp32 (recommended for MPS stability)

Models (from .env):
  Teacher Primary: {os.getenv('TEACHER_MODEL_PRIMARY', 'not set')}
  Teacher Fallback: {os.getenv('TEACHER_MODEL_FALLBACK', 'not set')}
  Student S1: {os.getenv('STUDENT_S1', 'not set')}
  Student S2: {os.getenv('STUDENT_S2', 'not set')}

Directories:
  Results: {ROOT_DIR / 'results'}
  HF Cache: {ROOT_DIR / 'hf_cache'}

Next Steps:
  1. Run 02_data_prep_sst2.ipynb to prepare SST-2 data
  2. Run 03_data_prep_squad.ipynb to prepare SQuAD data
  3. Run 04_teacher_cache_outputs.ipynb to cache teacher outputs
""")

ENVIRONMENT SETUP COMPLETE

Configuration:
  Fast Mode: true
  Device: mps
  Precision: fp32 (recommended for MPS stability)

Models (from .env):
  Teacher Primary: meta-llama/Llama-3.2-8B-Instruct
  Teacher Fallback: Qwen/Qwen2.5-3B-Instruct
  Student S1: TinyLlama/TinyLlama-1.1B-Chat-v1.0
  Student S2: quantized-proxy

Directories:
  Results: /Users/pjere/Workshop/thesis-exp/results
  HF Cache: /Users/pjere/Workshop/thesis-exp/hf_cache

Next Steps:
  1. Run 02_data_prep_sst2.ipynb to prepare SST-2 data
  2. Run 03_data_prep_squad.ipynb to prepare SQuAD data
  3. Run 04_teacher_cache_outputs.ipynb to cache teacher outputs

