# WeightWatcher Quantization Analysis - Qwen2.5 0.5B

This notebook analyzes how different quantization levels (FP16, 8-bit, 4-bit, 2-bit) affect the **alpha (α)** metric computed by WeightWatcher.

## Background

**WeightWatcher** uses Heavy-Tailed Random Matrix Theory (HTRMT) to analyze neural network layers without needing training or test data. The key metric is **alpha (α)**:

- α ∈ [2, 6]: Well-trained layer
- α > 6: Undertrained or poorly regularized
- α < 2: Over-regularized or corrupted

**Research Question**: How does quantization compression affect these metrics?

In [None]:
# Imports
import sys
import torch
from transformers import AutoModelForCausalLM
import weightwatcher as ww
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

print(f"PyTorch version: {torch.__version__}")
print(f"WeightWatcher version: {ww.__version__}")
print(f"Platform: {sys.platform}")
print(f"MPS available: {torch.backends.mps.is_available() if sys.platform == 'darwin' else 'N/A'}")

## Configuration

In [None]:
# Configuration
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
RESULTS_DIR = Path("../results/metrics")
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# Device selection
if torch.cuda.is_available():
    DEVICE = "cuda"
elif torch.backends.mps.is_available():
    DEVICE = "mps"
else:
    DEVICE = "cpu"

print(f"Using device: {DEVICE}")

## Helper Functions

In [None]:
def analyze_model_ww(model, model_name, quantization):
    """Run WeightWatcher analysis on a model."""
    print(f"Analyzing {quantization} model...")
    
    watcher = ww.WeightWatcher(model=model)
    results = watcher.analyze()  # WeightWatcher automatically computes alpha
    
    # Add metadata
    results['model_name'] = model_name
    results['quantization'] = quantization
    
    # Summary stats
    print(f"  Layers analyzed: {len(results)}")
    print(f"  Alpha range: [{results['alpha'].min():.2f}, {results['alpha'].max():.2f}]")
    print(f"  Alpha mean: {results['alpha'].mean():.2f}")
    
    optimal = ((results['alpha'] >= 2) & (results['alpha'] <= 6)).sum()
    print(f"  Layers in optimal range [2,6]: {optimal}/{len(results)}")
    
    return results

def clear_memory():
    """Clear GPU/MPS memory."""
    import gc
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    elif torch.backends.mps.is_available():
        torch.mps.empty_cache()

## 1. Load and Analyze FP16 Model (Baseline)

In [None]:
print("Loading FP16 model...")
model_fp16 = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)

print(f"Model loaded: {model_fp16.__class__.__name__}")
print(f"Total parameters: {sum(p.numel() for p in model_fp16.parameters()) / 1e9:.2f}B")

In [None]:
# Analyze FP16
results_fp16 = analyze_model_ww(model_fp16, MODEL_NAME, "fp16")

# Save results
results_fp16.to_csv(RESULTS_DIR / "results_fp16.csv", index=False)
print("\nResults saved!")

# Display first few rows
results_fp16.head(10)

In [None]:
# Clean up
del model_fp16
clear_memory()

## 2. Load and Analyze 8-bit Model

In [None]:
print("Loading 8-bit model...")

if sys.platform == "darwin":
    # Mac: Use simulated quantization
    print("  Using simulated 8-bit quantization (Mac-compatible)")
    from quantization_utils import apply_quantization_to_model
    
    model_8bit = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16,
        device_map="auto",
        low_cpu_mem_usage=True
    )
    model_8bit = apply_quantization_to_model(model_8bit, bits=8, symmetric=True)
else:
    # CUDA: Use bitsandbytes
    print("  Using bitsandbytes (CUDA)")
    model_8bit = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        load_in_8bit=True,
        device_map="auto",
        low_cpu_mem_usage=True
    )

print("Model loaded!")

In [None]:
# Analyze 8-bit
results_8bit = analyze_model_ww(model_8bit, MODEL_NAME, "8bit")

# Save results
results_8bit.to_csv(RESULTS_DIR / "results_8bit.csv", index=False)
print("\nResults saved!")

results_8bit.head(10)

In [None]:
# Clean up
del model_8bit
clear_memory()

## 3. Load and Analyze 4-bit Model (if available)

In [None]:
# Only run on CUDA systems with bitsandbytes
if torch.cuda.is_available():
    print("Loading 4-bit model...")
    from transformers import BitsAndBytesConfig
    
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    
    model_4bit = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config,
        device_map="auto",
        low_cpu_mem_usage=True
    )
    
    print("Model loaded!")
else:
    print("4-bit quantization requires CUDA. Skipping...")
    model_4bit = None

In [None]:
if model_4bit is not None:
    # Analyze 4-bit
    results_4bit = analyze_model_ww(model_4bit, MODEL_NAME, "4bit")
    
    # Save results
    results_4bit.to_csv(RESULTS_DIR / "results_4bit.csv", index=False)
    print("\nResults saved!")
    
    display(results_4bit.head(10))
    
    # Clean up
    del model_4bit
    clear_memory()
else:
    results_4bit = None

## 4. Comparative Analysis

In [None]:
# Collect all results
all_results = {
    'FP16': results_fp16,
    '8-bit': results_8bit,
}

if results_4bit is not None:
    all_results['4-bit'] = results_4bit

print(f"Comparing {len(all_results)} quantization levels")

### Alpha Distribution Comparison

In [None]:
fig, axes = plt.subplots(1, len(all_results), figsize=(6*len(all_results), 5))

if len(all_results) == 1:
    axes = [axes]

for ax, (quant, results) in zip(axes, all_results.items()):
    ax.hist(results['alpha'], bins=30, alpha=0.7, edgecolor='black')
    ax.axvspan(2, 6, alpha=0.2, color='green', label='Optimal [2,6]')
    ax.axvline(results['alpha'].mean(), color='red', linestyle='--', 
               linewidth=2, label=f'Mean: {results["alpha"].mean():.2f}')
    
    ax.set_title(f'Alpha Distribution - {quant}', fontsize=14, fontweight='bold')
    ax.set_xlabel('Alpha (α)')
    ax.set_ylabel('Number of Layers')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/plots/alpha_distributions_notebook.png', dpi=300, bbox_inches='tight')
plt.show()

### Boxplot Comparison

In [None]:
fig, ax = plt.subplots(figsize=(12, 7))

data = [results['alpha'] for results in all_results.values()]
labels = list(all_results.keys())

bp = ax.boxplot(data, labels=labels, patch_artist=True, notch=True, showmeans=True)

colors = sns.color_palette('husl', len(data))
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

ax.axhspan(2, 6, alpha=0.15, color='green', label='Optimal Range [2,6]')
ax.set_title('Alpha Distribution Comparison', fontsize=16, fontweight='bold')
ax.set_ylabel('Alpha (α)', fontsize=14)
ax.set_xlabel('Quantization Level', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('../results/plots/alpha_boxplot_notebook.png', dpi=300, bbox_inches='tight')
plt.show()

### Summary Statistics Table

In [None]:
summary_data = []

for quant, results in all_results.items():
    optimal = ((results['alpha'] >= 2) & (results['alpha'] <= 6)).sum()
    
    summary_data.append({
        'Quantization': quant,
        'Layers': len(results),
        'Mean α': f"{results['alpha'].mean():.3f}",
        'Std α': f"{results['alpha'].std():.3f}",
        'Median α': f"{results['alpha'].median():.3f}",
        'Min α': f"{results['alpha'].min():.3f}",
        'Max α': f"{results['alpha'].max():.3f}",
        'Optimal Layers': f"{optimal}/{len(results)}",
        'Optimal %': f"{(optimal/len(results)*100):.1f}%"
    })

summary_df = pd.DataFrame(summary_data)
print("\nSUMMARY STATISTICS")
print("="*100)
display(summary_df)

summary_df.to_csv(RESULTS_DIR / '../quantization_comparison_notebook.csv', index=False)
print("\nSaved to: quantization_comparison_notebook.csv")

### Change from Baseline (FP16)

In [None]:
baseline_mean = results_fp16['alpha'].mean()

print(f"Baseline (FP16) mean alpha: {baseline_mean:.3f}\n")
print("Change from baseline:")
print("-" * 60)

for quant, results in all_results.items():
    if quant == 'FP16':
        continue
    
    mean_diff = results['alpha'].mean() - baseline_mean
    pct_change = (mean_diff / baseline_mean) * 100
    
    print(f"{quant:10s}: {mean_diff:+.3f} ({pct_change:+.2f}%)")
    
    # Layer-wise correlation
    if len(results) == len(results_fp16):
        corr = results['alpha'].corr(results_fp16['alpha'])
        print(f"            Correlation with FP16: {corr:.3f}")
    print()

### Layer-wise Alpha Comparison

In [None]:
fig, ax = plt.subplots(figsize=(16, 7))

colors = sns.color_palette('husl', len(all_results))

for idx, (quant, results) in enumerate(all_results.items()):
    x = range(len(results))
    ax.plot(x, results['alpha'], marker='o', markersize=3, 
            linewidth=1.5, alpha=0.7, label=quant, color=colors[idx])

ax.axhspan(2, 6, alpha=0.1, color='green', label='Optimal Range')
ax.set_title('Layer-wise Alpha Comparison', fontsize=16, fontweight='bold')
ax.set_xlabel('Layer Index', fontsize=14)
ax.set_ylabel('Alpha (α)', fontsize=14)
ax.legend(loc='best')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/plots/layerwise_comparison_notebook.png', dpi=300, bbox_inches='tight')
plt.show()

## 5. Insights and Conclusions

In [None]:
print("="*80)
print("KEY INSIGHTS")
print("="*80)
print()
print("1. Alpha Metric Stability:")
print("   - Alpha values indicate layer quality based on spectral properties")
print("   - Optimal range is [2, 6] for well-generalized layers")
print()
print("2. Quantization Impact:")
for quant, results in all_results.items():
    optimal_pct = ((results['alpha'] >= 2) & (results['alpha'] <= 6)).sum() / len(results) * 100
    print(f"   - {quant}: {optimal_pct:.1f}% of layers in optimal range")
print()
print("3. Recommendations:")
print("   - If alpha degrades significantly, quantization may hurt generalization")
print("   - Layers with alpha > 6 after quantization may need special handling")
print("   - Consider layer-wise mixed precision for critical layers")
print()
print("="*80)

## 6. Export Results

All results have been saved to:
- `../results/metrics/results_*.csv` - Individual quantization results
- `../results/plots/*.png` - Generated visualizations
- `../results/quantization_comparison_notebook.csv` - Summary comparison

In [None]:
print("Analysis complete!")
print(f"\nResults saved to: {RESULTS_DIR}")
print(f"Plots saved to: ../results/plots/")