# üß™ Transformer Builder - Advanced Testing Lab

Welcome! This notebook provides comprehensive testing and training capabilities for your custom transformer architecture.

**What's included:**
- ‚úÖ **Tier 1:** Critical validation (shape, gradients, numerical stability)
- üî¨ **Tier 2:** Advanced analysis (attention patterns, robustness, profiling)
- üöÄ **Tier 3:** Training utilities (fine-tuning, hyperparameter sweeps, benchmarks)

**Quick Start:**
1. Click "Run all" (Runtime ‚Üí Run all)
2. Review Tier 1 results (should complete in ~1 minute)
3. Explore Tier 2/3 sections as needed

**Source:** Generated from [Transformer Builder](https://transformer-builder.com)

---

## ‚úÖ Notebook Version Check

**Current Version**: v3.1.0 (2025-01-13)  
**Changelog**: Surgical dependency install with --no-deps to prevent numpy corruption

**‚ö†Ô∏è IMPORTANT**: This version uses --no-deps for pytorch-lightning!  
**‚úÖ CORRECT**: Should show "Step 3/3: Installing pytorch-lightning (without dependency resolution)..."

---

## Setup: Install Dependencies

This may take 20-40 seconds.

In [None]:
# ==============================================================================
# VERSION VERIFICATION - Run this cell first to confirm correct notebook version
# ==============================================================================

print("=" * 70)
print("üîç NOTEBOOK VERSION VERIFICATION")
print("=" * 70)
print()
print("üìå Expected Version: v3.1.0 (2025-01-13)")
print("üìå Key Strategy:")
print("   ‚Ä¢ Use --no-deps for pytorch-lightning")
print("   ‚Ä¢ Prevents pip from corrupting Colab's numpy")
print("   ‚Ä¢ 3-step installation process")
print()
print("=" * 70)
print("‚úÖ If you see '3 steps' with --no-deps, CORRECT version!")
print("‚ùå If you see different steps, clear browser cache!")
print("=" * 70)
print()
print("Next: Run the cell below to install dependencies.")

In [None]:
# ==============================================================================
# DEPENDENCY INSTALLATION - Surgical approach to prevent numpy corruption
# ==============================================================================

print("üì¶ Installing dependencies (surgical install to prevent numpy corruption)...")
print()

# Step 1: Upgrade pip
print("Step 1/3: Upgrading pip...")
!pip install --upgrade pip -q
print("‚úì pip upgraded\n")

# Step 2: Install safe dependencies (no numpy conflicts)
print("Step 2/3: Installing safe dependencies...")
!wget -q https://raw.githubusercontent.com/matt-hans/transformer-builder-colab-templates/main/requirements-colab.txt -O requirements-colab.txt
!pip install -q -r requirements-colab.txt
print("‚úì Safe dependencies installed\n")

# Step 3: Install pytorch-lightning with --no-deps to prevent numpy corruption
print("Step 3/3: Installing pytorch-lightning (without dependency resolution)...")
print("  (Using --no-deps to prevent pip from corrupting numpy)")

# Install ONLY the specific packages we need, without letting pip resolve deps
!pip install -q --no-deps 'pytorch-lightning>=2.4.0,<2.6.0'
!pip install -q --no-deps 'torchmetrics>=1.3.0,<2.0.0'
!pip install -q --no-deps 'lightning-utilities>=0.10.0'

print("‚úì pytorch-lightning installed\n")

# Verify critical imports (using Colab's pre-installed numpy and torch)
print("=" * 60)
print("VERIFICATION")
print("=" * 60)

try:
    import numpy as np
    import torch
    import pytorch_lightning as pl
    from transformers import AutoTokenizer
    
    print(f"‚úÖ numpy: {np.__version__} (Colab pre-installed)")
    print(f"‚úÖ torch: {torch.__version__} (Colab pre-installed)")
    print(f"‚úÖ pytorch-lightning: {pl.__version__} (installed with --no-deps)")
    print(f"‚úÖ transformers: (Colab pre-installed)")
    
    # Verify numpy is not corrupted
    try:
        from numpy import rec, core
        from numpy._core import umath
        print(f"‚úÖ numpy C extensions: intact")
    except ImportError as e:
        print(f"‚ùå numpy C extensions: corrupted ({e})")
        raise
    
    # Check for GPU
    if torch.cuda.is_available():
        print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
        print(f"‚úÖ CUDA: {torch.version.cuda}")
    else:
        print("‚ö†Ô∏è  GPU: Not available (CPU mode)")
    
    print("\n‚úÖ All dependencies verified and ready!")
    print("\nüí° Strategy: Installed pytorch-lightning with --no-deps")
    print("   to prevent pip from corrupting Colab's numpy")
    
except ImportError as e:
    print(f"‚ùå Import error: {e}")
    print("\n‚ö†Ô∏è  If you see numpy corruption errors:")
    print("   1. Runtime ‚Üí Factory reset runtime")
    print("   2. Rerun all cells")
    print("   3. Report issue if persists")
    raise

In [None]:
# ==============================================================================
# DOWNLOAD UTILS PACKAGE
# ==============================================================================

print("üì¶ Downloading test utilities package...")

# Remove old utils directory if exists
!rm -rf utils/

# Download complete utils package from GitHub
!git clone --depth 1 --branch main https://github.com/matt-hans/transformer-builder-colab-templates.git temp_repo 2>/dev/null

# Copy utils directory
!cp -r temp_repo/utils ./

# Cleanup
!rm -rf temp_repo

# Verify package structure
import sys
import os

# Add current directory to Python path
if './' not in sys.path:
    sys.path.insert(0, './')

# Verify utils package is importable
try:
    import utils
    print(f"‚úÖ Utils package loaded (version {utils.__version__})")
    
    # Verify package structure
    utils_path = os.path.join(os.getcwd(), 'utils')
    subdirs = ['adapters', 'tokenization', 'training', 'ui']
    
    for subdir in subdirs:
        subdir_path = os.path.join(utils_path, subdir)
        if os.path.exists(subdir_path):
            print(f"‚úÖ {subdir}/ directory found")
        else:
            print(f"‚ö†Ô∏è  {subdir}/ directory missing")
    
    # Test importing test functions (backward compatibility)
    from utils import (
        test_shape_robustness,
        test_gradient_flow,
        test_output_stability,
        run_all_tier1_tests
    )
    print("‚úÖ Test functions importable")
    
    print("\n‚úÖ Utils package ready!")
    
except ImportError as e:
    print(f"‚ùå Failed to import utils package: {e}")
    print("Falling back to direct file download...")
    # Fallback: download test_functions.py directly
    !wget -q https://raw.githubusercontent.com/matt-hans/transformer-builder-colab-templates/main/utils/test_functions.py

## Load Custom Model from URL

This cell extracts your model code from the URL fragment (passed from Transformer Builder).

In [None]:
# Load Custom Model from URL (robust)
import os
import re
import json
import urllib.request
import urllib.error
from google.colab import output

#@title Load Custom Model from Gist (fallback form)
gist_id_form = ""  #@param {type:"string"}
model_name_form = "CustomTransformer"  #@param {type:"string"}

def _try_eval_js_for_params():
    js = r"""
(() => {
  try {
    // Try top frame (can throw cross-origin), then fall back to referrer/baseURI.
    let raw = null;
    try { raw = (window.parent && window.parent.location && window.parent.location.href) || null; } catch (e) {}
    if (!raw || typeof raw !== 'string' || raw === 'about:blank') {
      raw = document.referrer || document.baseURI || '';
    }
    const url = new URL(raw, window.location.origin);
    const fragment = url.hash.slice(1); // Remove leading '#'
    const sp = new URLSearchParams(fragment);
    return {
      ok: true,
      gist_id: sp.get('gist_id'),
      name: sp.get('name'),
      href: url.href
    };
  } catch (err) {
    return { ok: false, error: String(err) };
  }
})();
"""
    try:
        return output.eval_js(js)
    except Exception as e:
        return {"ok": False, "error": f"JS eval failed: {type(e).__name__}: {e}"}

def _validate_gist_id(gid: str):
    # Allow typical gist IDs (hex or alnum). Relax if needed.
    return bool(re.fullmatch(r"[A-Za-z0-9]+", gid or ""))

def _fetch_gist(gist_id: str) -> dict:
    url = f"https://api.github.com/gists/{gist_id}"
    req = urllib.request.Request(
        url,
        headers={
            "Accept": "application/vnd.github+json",
            "User-Agent": "transformer-builder-colab"
        },
    )
    try:
        with urllib.request.urlopen(req, timeout=20) as resp:
            return json.loads(resp.read().decode("utf-8"))
    except urllib.error.HTTPError as e:
        detail = f"HTTP {e.code}"
        try:
            body = e.read().decode("utf-8")
            if "rate limit" in body.lower():
                detail += " (GitHub API rate limit; try later or authenticate)"
        except Exception:
            pass
        raise RuntimeError(f"GitHub API error for gist {gist_id}: {detail}") from e
    except Exception as e:
        raise RuntimeError(f"Network error fetching gist {gist_id}: {e}") from e

def _write(path: str, text: str):
    with open(path, "w") as f:
        f.write(text)

# 1) Primary: JS from page URL (parent/referrer)
js_result = _try_eval_js_for_params()

gist_id = None
model_name = "CustomTransformer"

if isinstance(js_result, dict) and js_result.get("ok"):
    gist_id = (js_result.get("gist_id") or "").strip() or None
    model_name = (js_result.get("name") or model_name).strip() or model_name
elif isinstance(js_result, dict) and not js_result.get("ok"):
    print(f"‚ö†Ô∏è Could not read URL params via JS: {js_result.get('error')}")

# 2) Override with environment variable if present
env_gid = (os.environ.get("GIST_ID") or "").strip()
if env_gid:
    gist_id = env_gid

# 3) Fallback: user inputs (Colab form)
if not gist_id and gist_id_form.strip():
    gist_id = gist_id_form.strip()
if model_name_form.strip():
    model_name = model_name_form.strip() or model_name

# 4) Validate gist_id
if gist_id and not _validate_gist_id(gist_id):
    print(f"‚ö†Ô∏è Invalid gist_id format: {gist_id!r}. Ignoring it.")
    gist_id = None

params = {"name": model_name}

# 5) Load from gist or fall back to example
if gist_id:
    print(f"üì• Loading model from GitHub Gist: {gist_id} (name={model_name})")
    try:
        gist_data = _fetch_gist(gist_id)
        files = gist_data.get("files") or {}
        if "model.py" not in files or "config.json" not in files:
            raise RuntimeError("Gist missing required files: model.py and/or config.json")

        model_code = files["model.py"].get("content", "")
        config_json = files["config.json"].get("content", "")
        if not model_code or not config_json:
            raise RuntimeError("Empty content in model.py or config.json")

        _write("custom_transformer.py", model_code)
        _write("config.json", config_json)

        print("‚úÖ Model code loaded successfully")
        print(f"‚úÖ Gist URL: {gist_data.get('html_url', 'N/A')}")
        print(f"‚úÖ Code size: {len(model_code):,} bytes")
        print(f"‚úÖ Config size: {len(config_json):,} bytes")
    except Exception as e:
        print(f"‚ùå Failed to load model from Gist: {e}")
        print("‚ö†Ô∏è Falling back to example model...")
        gist_id = None

if not gist_id:
    print("‚ö†Ô∏è No valid gist_id found. Loading example model for demonstration...")
    example_code = """import torch
import torch.nn as nn

class ExampleTransformer(nn.Module):
    def __init__(self, vocab_size=50257, d_model=512, nhead=8, num_layers=6):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_projection = nn.Linear(d_model, vocab_size)
    
    def forward(self, input_ids):
        x = self.embedding(input_ids)
        x = self.transformer(x)
        return self.output_projection(x)
"""
    _write("custom_transformer.py", example_code)
    _write("config.json", json.dumps({
        "vocab_size": 50257,
        "d_model": 512,
        "nhead": 8,
        "num_layers": 6,
    }))
    params["name"] = "ExampleTransformer"
    print("‚úÖ Example model loaded")

## üìÑ View Loaded Model Code

This cell displays the Python code that was loaded from your Transformer Builder export. You can review the architecture before running tests.

In [None]:
# Display the loaded model code for transparency
print("=" * 80)
print("üìÑ LOADED MODEL CODE (custom_transformer.py)")
print("=" * 80)
print()

with open('custom_transformer.py', 'r') as f:
    model_code_display = f.read()

# Use syntax highlighting
from IPython.display import Code
display(Code(model_code_display, language='python'))

print()
print("=" * 80)
print("üìã MODEL CONFIGURATION (config.json)")
print("=" * 80)
print()

with open('config.json', 'r') as f:
    config_display = json.load(f)

# Pretty print JSON
print(json.dumps(config_display, indent=2))
print()
print("‚úÖ You can now proceed to run the model instantiation and tests below!")

## Dynamic Dependency Detection

Automatically detect and install any custom dependencies your model needs.

In [None]:
import ast
import subprocess
import sys

# Parse imports from generated code
with open('custom_transformer.py', 'r') as f:
    source_code = f.read()
    tree = ast.parse(source_code)

# Extract all imports
imports = set()
for node in ast.walk(tree):
    if isinstance(node, ast.Import):
        for alias in node.names:
            imports.add(alias.name.split('.')[0])
    elif isinstance(node, ast.ImportFrom):
        if node.module:
            imports.add(node.module.split('.')[0])

print(f"Detected imports: {', '.join(sorted(imports))}")

# Standard library modules (don't need pip install)
stdlib_modules = {
    'abc', 'collections', 'dataclasses', 'functools', 'json', 'math',
    'typing', 'warnings', 'os', 'sys', 're', 'time', 'copy'
}

# Already installed
installed_modules = {
    'torch', 'transformers', 'numpy', 'scipy', 'matplotlib',
    'pandas', 'seaborn', 'tqdm', 'torchinfo', 'captum', 'optuna'
}

# Find missing packages
missing = imports - stdlib_modules - installed_modules

if missing:
    print(f"\nInstalling additional dependencies: {', '.join(missing)}")
    for package in missing:
        try:
            subprocess.check_call(
                [sys.executable, '-m', 'pip', 'install', '-q', package],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
            print(f"  ‚úÖ Installed {package}")
        except subprocess.CalledProcessError:
            print(f"  ‚ö†Ô∏è Failed to install {package} (may not be a pip package)")
else:
    print("\n‚úÖ All dependencies already installed")

## Import and Instantiate Model

Load your custom transformer and prepare for testing.

In [None]:
import torch
import torch.nn as nn
from torchinfo import summary

# Import the custom model
exec(open('custom_transformer.py').read())

# Load config
with open('config.json') as f:
    config_dict = json.load(f)

# Find the model class
model_class = None
for name, obj in list(globals().items()):
    if isinstance(obj, type) and issubclass(obj, nn.Module) and obj is not nn.Module:
        if name == params['name']:
            model_class = obj
            break

if model_class is None:
    # Fallback: find any nn.Module subclass
    for name, obj in list(globals().items()):
        if isinstance(obj, type) and issubclass(obj, nn.Module) and obj is not nn.Module:
            model_class = obj
            print(f"‚ö†Ô∏è Using {name} (expected {params['name']})")
            break

if model_class:
    # Instantiate model
    try:
        model = model_class(**config_dict)
        model.eval()
        
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        
        print(f"‚úÖ Model instantiated: {model_class.__name__}")
        print(f"‚úÖ Total parameters: {total_params:,}")
        print(f"‚úÖ Trainable parameters: {trainable_params:,}")
        
        # Move to GPU if available
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        print(f"‚úÖ Device: {device}")
        
        # Display model summary
        print("\n--- Model Summary ---")
        try:
            # Create dummy input based on config
            vocab_size = config_dict.get('vocab_size', 50257)
            dummy_input = torch.randint(0, vocab_size, (1, 32)).to(device)
            summary(model, input_data=dummy_input, depth=3)
        except Exception as e:
            print(f"‚ö†Ô∏è Could not generate summary: {e}")
        
    except Exception as e:
        print(f"‚ùå Failed to instantiate model: {e}")
        raise
else:
    raise RuntimeError(f"Could not find model class '{params['name']}' in generated code")

# Create config object for test functions
class ModelConfig:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

config = ModelConfig(**config_dict)
print("\n‚úÖ Ready for testing!")

---

# üîç Tier 1: Critical Validation

These tests verify your model is mathematically sound and ready for training.

**Estimated time:** ~1 minute

**What's tested:**
- ‚úÖ Shape validation across edge cases
- ‚úÖ Gradient flow (detect vanishing/exploding gradients)
- ‚úÖ Numerical stability (NaN/Inf detection)
- ‚úÖ Parameter initialization quality
- ‚úÖ Memory footprint scaling
- ‚úÖ Inference speed benchmarks

In [None]:
# Import test utilities from the cloned utils package
from utils.test_functions import (
    test_shape_robustness,
    test_gradient_flow,
    test_output_stability,
    test_parameter_initialization,
    test_memory_footprint,
    test_inference_speed
)

print("‚úÖ Test functions loaded from utils package")

In [None]:
print("=" * 80)
print("TIER 1: CRITICAL VALIDATION")
print("=" * 80)
print()

# Test 1: Shape Robustness
print("Test 1/6: Shape Validation")
print("-" * 80)
shape_results = test_shape_robustness(model, config)
display(shape_results)
print()

# Test 2: Gradient Flow
print("Test 2/6: Gradient Flow Analysis")
print("-" * 80)
grad_results = test_gradient_flow(model, config)
display(grad_results)
print()

# Test 3: Output Stability
print("Test 3/6: Numerical Stability")
print("-" * 80)
stability_stats = test_output_stability(model, config, n_samples=100)
print()

# Test 4: Parameter Initialization
print("Test 4/6: Parameter Initialization")
print("-" * 80)
param_results = test_parameter_initialization(model)
display(param_results)
print()

# Test 5: Memory Footprint
print("Test 5/6: Memory Footprint Analysis")
print("-" * 80)
memory_results = test_memory_footprint(model, config)
display(memory_results)
print()

# Test 6: Inference Speed
print("Test 6/6: Inference Speed Benchmark")
print("-" * 80)
speed_stats = test_inference_speed(model, config, n_trials=50)
print()

print("=" * 80)
print("‚úÖ TIER 1 VALIDATION COMPLETE")
print("=" * 80)
print()
print("All critical tests passed! Your model is ready for advanced analysis.")
print()
print("Next steps:")
print("‚Ä¢ Scroll down for Tier 2 (Advanced Analysis)")
print("‚Ä¢ Or jump to Tier 3 (Training & Fine-Tuning)")

---

# üî¨ Tier 2: Advanced Analysis

Deep dive into model behavior with advanced diagnostic tools.

**Estimated time:** ~3-5 minutes

**What's tested:**
- üéØ **Attention Patterns:** Visualize attention weights, detect collapsed attention, analyze head specialization
- üîç **Attribution Analysis:** Identify which input tokens contribute most to predictions (using Captum)
- üõ°Ô∏è **Robustness Testing:** Measure stability under input perturbations and noise

**Note:** These tests are optional but highly recommended for understanding model behavior.

In [None]:
# Import Tier 2 test functions
from utils.test_functions import (
    test_attention_patterns,
    test_attribution_analysis,
    test_robustness
)

print("=" * 80)
print("TIER 2: ADVANCED ANALYSIS")
print("=" * 80)
print()

# Test 1: Attention Patterns
print("Test 1/3: Attention Pattern Analysis")
print("-" * 80)
try:
    attention_results = test_attention_patterns(model, config)
    if attention_results is not None:
        display(attention_results)
    print("‚úÖ Attention analysis complete")
except Exception as e:
    print(f"‚ö†Ô∏è Attention analysis skipped: {e}")
print()

# Test 2: Attribution Analysis
print("Test 2/3: Input Attribution Analysis")
print("-" * 80)
try:
    attribution_results = test_attribution_analysis(model, config)
    if attribution_results is not None:
        print("\nTop Contributing Tokens:")
        for token, score in attribution_results.get("top_tokens", []):
            print(f"  {token:20s}: {score:+.4f}")
    print("‚úÖ Attribution analysis complete")
except Exception as e:
    print(f"‚ö†Ô∏è Attribution analysis skipped: {e}")
print()

# Test 3: Robustness Testing
print("Test 3/3: Robustness Under Noise")
print("-" * 80)
try:
    robustness_results = test_robustness(model, config, n_samples=20)
    if robustness_results is not None:
        display(robustness_results)
    print("‚úÖ Robustness analysis complete")
except Exception as e:
    print(f"‚ö†Ô∏è Robustness analysis skipped: {e}")
print()

print("=" * 80)
print("‚úÖ TIER 2 ANALYSIS COMPLETE")
print("=" * 80)
print()
print("Next: Scroll down for Tier 3 (Training & Fine-Tuning)")

---

# üöÄ Tier 3: Training & Production Utilities

Advanced utilities for fine-tuning, hyperparameter optimization, and production benchmarking.

**Estimated time:** ~10-20 minutes (depends on training iterations)

**What's included:**
- üéì **Fine-Tuning:** Basic training loop with loss tracking and gradient monitoring
- üîß **Hyperparameter Search:** Automated optimization using Optuna (learning rate, batch size, warmup)
- üìä **Benchmark Comparison:** Compare your model against production baselines (distilgpt2, bert-base, etc.)

**Note:** These are compute-intensive operations. Consider using GPU runtime for faster execution.

In [None]:
# Import Tier 3 training utilities
from utils.test_functions import (
    test_fine_tuning,
    test_hyperparameter_search,
    test_benchmark_comparison
)

print("=" * 80)
print("TIER 3: TRAINING & PRODUCTION UTILITIES")
print("=" * 80)
print()

# Test 1: Fine-Tuning
print("Test 1/3: Fine-Tuning Demo")
print("-" * 80)
print("Running 3 epochs of fine-tuning with synthetic data...")
try:
    fine_tune_results = test_fine_tuning(
        model, 
        config, 
        num_epochs=3,
        batch_size=2,
        learning_rate=5e-5
    )
    print(f"\nFinal Loss: {fine_tune_results['final_loss']:.4f}")
    print(f"Best Loss: {fine_tune_results['best_loss']:.4f}")
    print("‚úÖ Fine-tuning complete")
except Exception as e:
    print(f"‚ö†Ô∏è Fine-tuning skipped: {e}")
print()

# Test 2: Hyperparameter Search (OPTIONAL - Comment out to skip)
print("Test 2/3: Hyperparameter Optimization")
print("-" * 80)
print("‚ö†Ô∏è Skipping hyperparameter search (compute-intensive)")
print("To enable: uncomment the code block below")
print()
# Uncomment to run:
# try:
#     hp_results = test_hyperparameter_search(
#         model,
#         config,
#         n_trials=5,
#         epochs_per_trial=2
#     )
#     print("\nBest Parameters:")
#     for param, value in hp_results['best_params'].items():
#         print(f"  {param}: {value}")
#     print("‚úÖ Hyperparameter search complete")
# except Exception as e:
#     print(f"‚ö†Ô∏è Hyperparameter search failed: {e}")

# Test 3: Benchmark Comparison
print("Test 3/3: Benchmark Against Baseline")
print("-" * 80)
print("Comparing against distilgpt2 baseline...")
try:
    benchmark_results = test_benchmark_comparison(
        model,
        config,
        baseline_model="distilgpt2",
        n_samples=10
    )
    if benchmark_results is not None:
        display(benchmark_results)
    print("‚úÖ Benchmark comparison complete")
except Exception as e:
    print(f"‚ö†Ô∏è Benchmark comparison skipped: {e}")
print()

print("=" * 80)
print("‚úÖ TIER 3 TRAINING UTILITIES COMPLETE")
print("=" * 80)
print()
print("üéâ All testing tiers complete! Your model is production-ready.")