# 🧪 Transformer Builder - Advanced Testing Lab

Welcome! This notebook provides comprehensive testing and training capabilities for your custom transformer architecture.

**What's included:**
- ✅ **Tier 1:** Critical validation (shape, gradients, numerical stability)
- 🔬 **Tier 2:** Advanced analysis (attention patterns, robustness, profiling)
- 🚀 **Tier 3:** Training utilities (fine-tuning, hyperparameter sweeps, benchmarks)

**Quick Start:**
1. Click "Run all" (Runtime → Run all)
2. Review Tier 1 results (should complete in ~1 minute)
3. Explore Tier 2/3 sections as needed

**Source:** Generated from [Transformer Builder](https://transformer-builder.com)

---

## Setup: Install Dependencies

This may take 30-60 seconds on first run.

In [None]:
# Install core dependencies
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers datasets evaluate accelerate
!pip install -q scipy matplotlib seaborn pandas tqdm
!pip install -q torchinfo  # For model summaries

print("✓ Core dependencies installed")

# Install Tier 2 dependencies (optional)
!pip install -q captum  # For attribution analysis
print("✓ Tier 2 dependencies installed")

# Install Tier 3 dependencies (optional)
!pip install -q optuna  # For hyperparameter optimization
print("✓ Tier 3 dependencies installed")

print("\n✅ All dependencies ready!")

## Load Custom Model from URL

This cell extracts your model code from the URL fragment (passed from Transformer Builder).

In [None]:
import base64
import json
from google.colab import output

# JavaScript to extract URL fragment parameters
js_script = """
function getFragmentParameter(name) {
  const urlParams = new URLSearchParams(window.location.hash.substring(1));
  return urlParams.get(name);
}
return {
  model: getFragmentParameter('model'),
  config: getFragmentParameter('config'),
  name: getFragmentParameter('name') || 'CustomTransformer'
};
"""

# Extract parameters
params = output.eval_js(js_script)

def decode_urlsafe_b64(s):
    """Decode URL-safe Base64 string"""
    if not s:
        return None
    # Convert URL-safe back to standard Base64
    s = s.replace('-', '+').replace('_', '/')
    # Add padding if needed
    padding = 4 - (len(s) % 4)
    if padding != 4:
        s += '=' * padding
    return base64.b64decode(s).decode('utf-8')

if params['model'] and params['config']:
    # Decode and write model code
    model_code = decode_urlsafe_b64(params['model'])
    with open('custom_transformer.py', 'w') as f:
        f.write(model_code)
    
    # Decode and write config
    config_json = decode_urlsafe_b64(params['config'])
    with open('config.json', 'w') as f:
        f.write(config_json)
    
    print(f"✅ Model code loaded successfully")
    print(f"✅ Model name: {params['name']}")
    print(f"✅ Code size: {len(model_code):,} bytes")
    print(f"✅ Config size: {len(config_json):,} bytes")
    
    # Display model code preview
    print("\n--- Model Code Preview (first 30 lines) ---")
    print('\n'.join(model_code.split('\n')[:30]))
    if len(model_code.split('\n')) > 30:
        print("...")
    
else:
    print("⚠️ No model code found in URL")
    print("Loading example model for demonstration...\n")
    
    # Fallback: Create example model
    example_code = """import torch
import torch.nn as nn

class ExampleTransformer(nn.Module):
    def __init__(self, vocab_size=50257, d_model=512, nhead=8, num_layers=6):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_projection = nn.Linear(d_model, vocab_size)
    
    def forward(self, input_ids):
        x = self.embedding(input_ids)
        x = self.transformer(x)
        return self.output_projection(x)
"""
    
    with open('custom_transformer.py', 'w') as f:
        f.write(example_code)
    
    with open('config.json', 'w') as f:
        json.dump({
            "vocab_size": 50257,
            "d_model": 512,
            "nhead": 8,
            "num_layers": 6
        }, f)
    
    params['name'] = 'ExampleTransformer'
    print("✅ Example model loaded")

## Dynamic Dependency Detection

Automatically detect and install any custom dependencies your model needs.

In [None]:
import ast
import subprocess
import sys

# Parse imports from generated code
with open('custom_transformer.py', 'r') as f:
    source_code = f.read()
    tree = ast.parse(source_code)

# Extract all imports
imports = set()
for node in ast.walk(tree):
    if isinstance(node, ast.Import):
        for alias in node.names:
            imports.add(alias.name.split('.')[0])
    elif isinstance(node, ast.ImportFrom):
        if node.module:
            imports.add(node.module.split('.')[0])

print(f"Detected imports: {', '.join(sorted(imports))}")

# Standard library modules (don't need pip install)
stdlib_modules = {
    'abc', 'collections', 'dataclasses', 'functools', 'json', 'math',
    'typing', 'warnings', 'os', 'sys', 're', 'time', 'copy'
}

# Already installed
installed_modules = {
    'torch', 'transformers', 'numpy', 'scipy', 'matplotlib',
    'pandas', 'seaborn', 'tqdm', 'torchinfo', 'captum', 'optuna'
}

# Find missing packages
missing = imports - stdlib_modules - installed_modules

if missing:
    print(f"\nInstalling additional dependencies: {', '.join(missing)}")
    for package in missing:
        try:
            subprocess.check_call(
                [sys.executable, '-m', 'pip', 'install', '-q', package],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
            print(f"  ✅ Installed {package}")
        except subprocess.CalledProcessError:
            print(f"  ⚠️ Failed to install {package} (may not be a pip package)")
else:
    print("\n✅ All dependencies already installed")

## Import and Instantiate Model

Load your custom transformer and prepare for testing.

In [None]:
import torch
import torch.nn as nn
from torchinfo import summary

# Import the custom model
exec(open('custom_transformer.py').read())

# Load config
with open('config.json') as f:
    config_dict = json.load(f)

# Find the model class
model_class = None
for name, obj in list(globals().items()):
    if isinstance(obj, type) and issubclass(obj, nn.Module) and obj is not nn.Module:
        if name == params['name']:
            model_class = obj
            break

if model_class is None:
    # Fallback: find any nn.Module subclass
    for name, obj in list(globals().items()):
        if isinstance(obj, type) and issubclass(obj, nn.Module) and obj is not nn.Module:
            model_class = obj
            print(f"⚠️ Using {name} (expected {params['name']})")
            break

if model_class:
    # Instantiate model
    try:
        model = model_class(**config_dict)
        model.eval()
        
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        
        print(f"✅ Model instantiated: {model_class.__name__}")
        print(f"✅ Total parameters: {total_params:,}")
        print(f"✅ Trainable parameters: {trainable_params:,}")
        
        # Move to GPU if available
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        print(f"✅ Device: {device}")
        
        # Display model summary
        print("\n--- Model Summary ---")
        try:
            # Create dummy input based on config
            vocab_size = config_dict.get('vocab_size', 50257)
            dummy_input = torch.randint(0, vocab_size, (1, 32)).to(device)
            summary(model, input_data=dummy_input, depth=3)
        except Exception as e:
            print(f"⚠️ Could not generate summary: {e}")
        
    except Exception as e:
        print(f"❌ Failed to instantiate model: {e}")
        raise
else:
    raise RuntimeError(f"Could not find model class '{params['name']}' in generated code")

# Create config object for test functions
class ModelConfig:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

config = ModelConfig(**config_dict)
print("\n✅ Ready for testing!")

---

# 🔍 Tier 1: Critical Validation

These tests verify your model is mathematically sound and ready for training.

**Estimated time:** ~1 minute

**What's tested:**
- ✅ Shape validation across edge cases
- ✅ Gradient flow (detect vanishing/exploding gradients)
- ✅ Numerical stability (NaN/Inf detection)
- ✅ Parameter initialization quality
- ✅ Memory footprint scaling
- ✅ Inference speed benchmarks

In [None]:
# Import test utilities
!wget -q https://raw.githubusercontent.com/matt-hans/transformer-builder-colab-templates/main/utils/test_functions.py
from test_functions import (
    test_shape_robustness,
    test_gradient_flow,
    test_output_stability,
    test_parameter_initialization,
    test_memory_footprint,
    test_inference_speed
)

print("✅ Test functions loaded")

In [None]:
print("=" * 80)
print("TIER 1: CRITICAL VALIDATION")
print("=" * 80)
print()

# Test 1: Shape Robustness
print("Test 1/6: Shape Validation")
print("-" * 80)
shape_results = test_shape_robustness(model, config)
display(shape_results)
print()

# Test 2: Gradient Flow
print("Test 2/6: Gradient Flow Analysis")
print("-" * 80)
grad_results = test_gradient_flow(model, config)
display(grad_results)
print()

# Test 3: Output Stability
print("Test 3/6: Numerical Stability")
print("-" * 80)
stability_stats = test_output_stability(model, config, n_samples=100)
print()

# Test 4: Parameter Initialization
print("Test 4/6: Parameter Initialization")
print("-" * 80)
param_results = test_parameter_initialization(model)
display(param_results)
print()

# Test 5: Memory Footprint
print("Test 5/6: Memory Footprint Analysis")
print("-" * 80)
memory_results = test_memory_footprint(model, config)
display(memory_results)
print()

# Test 6: Inference Speed
print("Test 6/6: Inference Speed Benchmark")
print("-" * 80)
speed_stats = test_inference_speed(model, config, n_trials=50)
print()

print("=" * 80)
print("✅ TIER 1 VALIDATION COMPLETE")
print("=" * 80)
print()
print("All critical tests passed! Your model is ready for advanced analysis.")
print()
print("Next steps:")
print("• Scroll down for Tier 2 (Advanced Analysis)")
print("• Or jump to Tier 3 (Training & Fine-Tuning)")