In [1]:
%load_ext autoreload
%autoreload 2

# Loss Landscape Analysis - Grokking Experiment

This notebook demonstrates how to visualize the loss landscape of a trained model
using the `nn_landscape` module.

Based on "Visualizing the Loss Landscape of Neural Nets" (Li et al., 2018).

In [1]:
import sys
from pathlib import Path

import torch
from torch.utils.data import DataLoader
from datasets import load_from_disk

from gradientlab.experiments.exp20260108_grokking.dataset.torch_dataset import DateToISODataset, DateCollate
from gradientlab.experiments.exp20260108_grokking.exp_config import ExpConfig

# Import the landscape module
from gradientlab.nn_landscape import LossLandscape, LandscapeConfig, VisualizationConfig

  from .autonotebook import tqdm as notebook_tqdm


## 1. Load the model and data

In [2]:
# Configuration
exp_cfg = ExpConfig()
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {device}")
print(f"Experiment directory: {exp_cfg.exp_dir}")

Device: cuda
Experiment directory: /media/mascit/data/Projects/python/gradientlab/src/gradientlab/experiments/exp20260108_grokking/data


In [3]:
# Load tokenizer
from gradientlab.tokenizers.byte_tokenizer import byte_tokenizer


tokenizer = byte_tokenizer()

In [4]:
# Load dataset
ds = load_from_disk(exp_cfg.ds_name)
print(f"Train samples: {len(ds['train'])}")
print(f"Test samples: {len(ds['test'])}")

# Create validation dataloader for landscape evaluation
val_dataset = DateToISODataset(ds["test"])
val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    collate_fn=DateCollate(tokenizer),
    num_workers=0,
)

Train samples: 4000
Test samples: 16000


In [5]:
# Create model
from gradientlab.experiments.exp20260108_grokking.modeling.factory import ModelFactory

checkpoint_path = Path("/media/mascit/data/Projects/python/gradientlab/src/gradientlab/experiments/exp20260108_grokking/data/model/model.pt")

model, tokenizer, cfg = ModelFactory.build_grokking_model(checkpoint_path.as_posix())
model = model.to(exp_cfg.device)
model = model.eval()
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

Model parameters: 3,412,736


## 2. Compute the Loss Landscape

In [6]:
# Configure landscape computation
landscape_config = LandscapeConfig(
    grid_size=31,           # 31x31 grid = 961 evaluations
    alpha_range=(-1.0, 1.0),
    beta_range=(-1.0, 1.0),
    num_batches=20,         # Use 20 batches per evaluation for speed
    normalize_filter_wise=True,
    random_seed=42,
    use_mixed_precision=True,
)

print(f"Grid size: {landscape_config.grid_size}x{landscape_config.grid_size}")
print(f"Total evaluations: {landscape_config.grid_size ** 2}")

Grid size: 31x31
Total evaluations: 961


In [7]:
# Create landscape and compute
landscape = LossLandscape(
    model=model,
    dataloader=val_loader,
    config=landscape_config,
    device=device,
)

# This will take some time depending on grid_size and num_batches
landscape.compute(verbose=True)

Evaluating landscape: 100%|██████████| 961/961 [05:09<00:00,  3.11it/s]


<gradientlab.nn_landscape.landscape.LossLandscape at 0x706a4097ec10>

In [8]:
import pickle


In [9]:
with open("f.pkl", "rb") as fp:
    landscape = pickle.load(fp)

In [None]:

with open("f.pkl", "wb") as fp:
    pickle.dump(landscape, fp)

In [8]:
# Print statistics
print(f"Center loss (reference): {landscape.center_loss:.4f}")
print(f"Minimum loss: {landscape.min_loss:.4f}")
print(f"Maximum loss: {landscape.max_loss:.4f}")

Center loss (reference): 6.2691
Minimum loss: 6.0855
Maximum loss: 6.8170


## 3. Visualize the Landscape

In [None]:
# Interactive 3D surface plot
vis_config = VisualizationConfig(
    plot_type="surface",
    colorscale="Viridis",
    log_scale=True,  # Use log scale for better visualization
    width=800,
    height=600,
)

fig = landscape.plot(vis_config)
fig.show()

In [11]:
!uv add --dev nbformat

[2K[2mResolved [1m154 packages[0m [2min 503ms[0m[0m                                       [0m
[2K   [36m[1mBuilding[0m[39m gradientlab[2m @ file:///media/mascit/data/Projects/python/gradientlab[0m
[2K[1A   [36m[1mBuilding[0m[39m gradientlab[2m @ file:///media/mascit/data/Projects/python/gradientlab[0m
[37m⠙[0m [2mPreparing packages...[0m (0/4)
[2K[2A   [36m[1mBuilding[0m[39m gradientlab[2m @ file:///media/mascit/data/Projects/python/gradientlab[0m
[37m⠙[0m [2mPreparing packages...[0m (0/4)
[2K[2A   [36m[1mBuilding[0m[39m gradientlab[2m @ file:///media/mascit/data/Projects/python/gradientlab[0m
[37m⠙[0m [2mPreparing packages...[0m (0/4)
[2mreferencing         [0m [32m[2m------------------------------[0m[0m     0 B/26.14 KiB
[2K[3A   [36m[1mBuilding[0m[39m gradientlab[2m @ file:///media/mascit/data/Projects/python/gradientlab[0m
[37m⠙[0m [2mPreparing packages...[0m (0/4)
[2mreferencing         [0m [32m[2m-----------

In [12]:
# Contour plot
vis_config_contour = VisualizationConfig(
    plot_type="contour",
    colorscale="Viridis",
    log_scale=True,
    num_contours=30,
)

fig_contour = landscape.plot(vis_config_contour)
fig_contour.show()

In [13]:
# Combined view (3D surface + contour)
vis_config_both = VisualizationConfig(
    plot_type="both",
    colorscale="Viridis",
    log_scale=True,
)

fig_both = landscape.plot(vis_config_both)
fig_both.show()

## 4. Save Results

In [None]:
# Save interactive plot as HTML
output_dir = exp_cfg.exp_dir / "landscapes"
output_dir.mkdir(exist_ok=True)

landscape.save(
    output_dir / "landscape.html",
    vis_config=vis_config_both,
    include_data=True,  # Also saves .pt file with raw data
)

print(f"Saved to {output_dir}")

## 5. Analyze the Landscape

Key observations to look for:
- **Sharp vs flat minima**: Flat minima often generalize better
- **Symmetry**: The landscape shape reveals optimization dynamics
- **Loss at center**: Should be at/near minimum if well-trained

In [14]:
# 1D slices through the landscape
from gradientlab.nn_landscape import LandscapeVisualizer

visualizer = LandscapeVisualizer(VisualizationConfig(log_scale=True))

# Slice along alpha direction (beta=0)
fig_alpha = visualizer.plot_1d_slice(
    landscape.loss_grid,
    landscape.alphas,
    landscape.betas,
    direction="alpha",
    title="Loss slice along α (Direction 1)",
)
fig_alpha.show()

In [15]:
# Slice along beta direction (alpha=0)
fig_beta = visualizer.plot_1d_slice(
    landscape.loss_grid,
    landscape.alphas,
    landscape.betas,
    direction="beta",
    title="Loss slice along β (Direction 2)",
)
fig_beta.show()

In [None]:
# Diagonal slice (alpha = beta)
fig_diag = visualizer.plot_1d_slice(
    landscape.loss_grid,
    landscape.alphas,
    landscape.betas,
    direction="diagonal",
    title="Loss slice along diagonal (α = β)",
)
fig_diag.show()

## 6. Compare Landscapes at Different Training Stages

To see how the landscape evolves during training, use `LandscapeCallback`
in your training loop.

In [None]:
# Example: Load a previously saved landscape
# data = LossLandscape.load_data(output_dir / "landscape.pt")
# print(data.keys())  # dict_keys(['loss_grid', 'alphas', 'betas', 'direction1', 'direction2', 'reference_weights', 'config'])