In [None]:
import torch

from NeuroVisualizer.neuro_aux.AEmodel import UniformAutoencoder

from helper.neuro_viz import get_dataloader_flat

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Neuro-Visualizer
This notebook creates the loss landscape from the NeuroVisualizer

In [None]:
dataset_name = 'mnist'
run_ids = [
    "run-0011-CNN_mnist_32_0.9776", # No Residual
    "run-0012-CNN_mnist_32_0.9768", # No Residual
]

titles = [
    "SGD, 0.9776",
    "SAM, 0.9768"
]

In [None]:
dataset_name = 'mnist'
run_ids = [
    "run-0042-CNN_mnist_32_0.9387", # 
    "run-0043-CNN_mnist_32_0.9387", # 
    "run-0044-CNN_mnist_32_0.9445", # 
    "run-0045-CNN_mnist_32_0.9450", # 
    "run-0046-CNN_mnist_32_0.9403", # 
    "run-0047-CNN_mnist_32_0.9403", # 
]
titles = [
    "Seed 42, SAM, 0.9387",
    "Seed 42, SGD, 0.9387",
    "Seed 11, SAM, 0.9445",
    "Seed 11, SGD, 0.9450",
    "Seed 6, SAM, 0.9403",
    "Seed 6, SGD, 0.9403"
]

In [None]:
dataset_name = 'mnist'
run_ids = [
    "run-0048-ViT_mnist_32_0.9759",
    "run-0049-ViT_mnist_32_0.9778",
    "run-0050-ViT_mnist_32_0.9743"
]
titles = [
    "ViT Seed 42, 0.9759",
    "ViT Seed 11, 0.9778",
    "ViT Seed 06, 0.9743"
]

In [None]:
dataset_name = 'mnist'
run_ids = [
    "run-0013-CNN_mnist_32_0.9797", # Residual
    "run-0014-CNN_mnist_32_0.9744", # Residual
]

titles = [
    "SGD, 0.9797",
    "SAM, 0.9744"
]

In [None]:
# CNN x CIFAR 10 
dataset_name = 'cifar10'

run_ids = [
    "run-0017-CNN_cifar10_128_0.8072",  # Seed 42, SAM
    "run-0019-CNN_cifar10_128_0.8487",  # Seed 42
    "run-0021-CNN_cifar10_128_0.8054",  # Seed 11, SAM
    "run-0023-CNN_cifar10_128_0.8509",  # Seed 11
    "run-0025-CNN_cifar10_128_0.8062",
    "run-0027-CNN_cifar10_128_0.8503"
]

titles = [
    "Seed 42, SAM, 0.8072",
    "Seed 42, SGD, 0.8487",
    "Seed 11, SAM, 0.8054",
    "Seed 11, SGD, 0.8509",
    "Seed 6, SAM, 0.8062",
    "Seed 6, SGD, 0.8503",
]

In [None]:
# CNN Residual x CIFAR 10 
dataset_name = 'cifar10'

run_ids = [
    "run-0016-CNN_cifar10_128_0.8093",  # Seed 42, SAM, Residual
    "run-0018-CNN_cifar10_128_0.8499",  # Seed 42, Residual
    "run-0020-CNN_cifar10_128_0.8079",  # Seed 11, SAM, Residual
    "run-0022-CNN_cifar10_128_0.8519",  # Seed 11, Residual
    "run-0024-CNN_cifar10_128_0.8062",
    "run-0026-CNN_cifar10_128_0.8504"
]

titles = [
    "Seed 42, SAM, Residual 0.8093",
    "Seed 42, SGD, Residual 0.8499",
    "Seed 11, SAM, Residual 0.8079",
    "Seed 11, SGD, Residual 0.8519",
    "Seed 6, SAM, Residual 0.0.8062",
    "Seed 6, SGD, Residual 0.8504",
]

In [None]:
include_lmc = False

### Load Paths

In [None]:
from helper.visualization import Run

runs = []
for run_id in run_ids:
    runs.append(Run(run_id, dataset_name))

In [None]:
for run in runs:
    print(run.results["train_config"])

In [None]:
pt_files_per_run = [run.get_pt_files() for run in runs]

In [None]:
vis_id = ' x '.join([run.results["ll_flattened_weights_dir"] for run in runs])
model_file = f'ae_models/{vis_id}{"-LMC" if include_lmc else ""}.pt'
print(model_file)

#model_file = "ae_models/run-0016-CNN x run-0018-CNN x run-0020-CNN x run-0022-CNN x run-0024-CNN x run-0026-CNN.pt"

In [None]:
pt_files_per_run

In [None]:
# Filter for final epochs only

# pt_files_per_run = []

# for run in runs:
#     min_loss = min(run.results["val_losses"])
#     max_visualize = min_loss * 1.1
#     print(max_visualize)
#     # Find in run.results["val_losses"] idx where val losses is first below max_visualize
#     idx = next((i for i, v in enumerate(run.results["val_losses"]) if v <= max_visualize), None)
#     pt_files_per_run.append(run.get_pt_files()[idx:])
#
# pt_files_per_run

## Train AE Model
Run this part to train an AE-Model

In [None]:
batch_size = 3 #4 - 32 Batch Size of AE Training

loader, normalizer = get_dataloader_flat(
    pt_files_per_run,
    batch_size,
    include_lmc=include_lmc,
    shuffle=True,
)

In [None]:
torch.cuda.empty_cache()

Adjust: Choose the hidden dimension (that the model-GPU combination is still working with)

In [None]:
input_dim = loader.dataset[0].shape[0]
print(f"Input dimension: {input_dim}")

latent_dim = 2
num_layers = 4

# Aggressive compression (scales with first hidden dim)
#h = [input_dim, 64, 32, 8]
#h = [input_dim, 126, 64, 32]
#h = [input_dim, 200, 100, 50]
#ae = UniformAutoencoder(input_dim, num_layers, latent_dim, h=h).to(device)

ae = UniformAutoencoder(input_dim, num_layers, latent_dim).to(device)

In [None]:
total_params = sum(p.numel() for p in ae.parameters())
trainable_params = sum(p.numel() for p in ae.parameters() if p.requires_grad)

size_mb = total_params * 4 / (1024**2)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Approx. size: {size_mb:.2f} MB")

# print(f"Approx. size: {size_mb:.2f} MB")

In [None]:
# Load from previous train process (if available, eg. after a crash)
# ae.load_state_dict(torch.load(model_file, weights_only=True))

In [None]:
from helper.neuro_viz import train_autoencoder

trained_model = train_autoencoder(
    model=ae,
    train_loader=loader,
    device=device,
    save_path=model_file,
    num_epochs=500, #1000 would be great
    lr=0.01, # Start with 0.01
    patience=15,
    avoid_overheat=False, # Avoids crashes on Nembus Computer
    last_saved_loss=0.08, # Minimum Loss to save
    verbose=False
)
# ~ 0.0173 possible (CIFAR10 CNN)

In [None]:
torch.save(ae.state_dict(), model_file)

## Visualize Trajectory
Begin here when trained Autoencoder (AE) can be loaded

In [None]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt

from NeuroVisualizer.neuro_aux.AEmodel import UniformAutoencoder
from NeuroVisualizer.neuro_aux.utils import get_files, repopulate_model
from NeuroVisualizer.neuro_aux.trajectories_data import get_trajectory_dataloader

In [None]:
batch_size = 4
loss_name = 'test_loss'
whichloss = 'mse' # this is CrossEntropyLoss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Get file list
# pt_files = get_files(model_folder, prefix="model-")

# Load AE
example_tensor = torch.load(pt_files_per_run[0][0], weights_only=True)
input_dim = example_tensor.shape[0]
latent_dim = 2
num_layers = 4
#h = [input_dim, 64, 32, 8]
#h = [input_dim, 128, 64, 16]
#h = [input_dim, 200, 100, 50]

#ae_model = UniformAutoencoder(input_dim, num_layers, latent_dim, h=h).to(device)
ae_model = UniformAutoencoder(input_dim, num_layers, latent_dim).to(device)
ae_model.load_state_dict(torch.load(model_file, weights_only=True))
_ = ae_model.eval()

In [None]:
# ---- Load data ----
from helper.neuro_viz import get_dataloader_flat

trajectory_loader, transform = get_dataloader_flat(pt_files_per_run, batch_size, shuffle=False) #[:5] for Subset

### Repopulate original Model Architecture
**IMPORTANT: needs correct model**

In [None]:
for run in runs:
    print(run.results["model_info"])

In [None]:
from helper.vision_classification import init_mlp_for_dataset, init_cnn_for_dataset, init_vit_for_dataset
from helper.neuro_viz import Loss

#TODO Check the model:

#model = init_cnn_for_dataset(dataset_name, conv_dims=[8, 16], kernel_sizes=[3, 3], hidden_dims=[32], dropout=0.25, residual=False).to(device)
#model = init_cnn_for_dataset(dataset_name, conv_dims=[8, 16], kernel_sizes=[3, 3], hidden_dims=[32], dropout=0.25, residual=True).to(device)

#model = init_cnn_for_dataset(dataset_name, conv_dims=[32, 64], kernel_sizes=[3, 3], hidden_dims=[128], dropout=0.25, residual=False).to(device)
#model = init_cnn_for_dataset(dataset, conv_dims=[32, 64], kernel_sizes=[3, 3], hidden_dims=[128], dropout=0.25, residual=True).to(device)
#model = init_cnn_for_dataset(dataset_name, conv_dims=[64, 128, 256], kernel_sizes=[5, 3, 3], hidden_dims=[256, 128], dropout=0.2, residual=True).to(device)
#model = init_mlp_for_dataset(dataset_name, hidden_dims=[254, 64], dropout=0.1).to(device)

model = init_vit_for_dataset(dataset_name, emb_dim=32, depth=3, num_heads=4, mlp_dim=64, dropout=0.2, patch_size=7).to(device)

loss_obj = Loss(dataset_name, device)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Trainable parameters: {count_parameters(model):,}")
print(f"AE input/output dim:  {ae_model.encoder.fcs[0].in_features:,}")

#### Compute trajectory (Coordinates and Loss)

In [None]:
from helper.neuro_viz import compute_trajectory

trajectory_coordinates, trajectory_models, trajectory_losses, ae_losses_decode, ae_losses_finetuned = compute_trajectory(
    trajectory_loader,
    ae_model,
    transform,
    loss_obj,
    model,
    loss_name,
    whichloss,
    device,
    recalibrate_bn=True, # Optimizes Loss precision
)

In [None]:
from helper.neuro_viz import compute_lmc_lines

lmc_coords_list, lmc_losses_list, lmc_meta = compute_lmc_lines(
    pt_files_per_run,
    ae_model,
    transform,
    loss_obj,
    model,
    loss_name,
    whichloss,
    device)

In [None]:
print(lmc_coords_list)

In [None]:
print(ae_losses_decode)
print(ae_losses_decode.mean())
print(ae_losses_finetuned)
print(ae_losses_finetuned.mean())

In [None]:
# Get lengths for each run
chunk_sizes = [len(run) for run in pt_files_per_run]
num_chunks = len(chunk_sizes)

# Split trajectory arrays according to these lengths
tr_losses = np.split(trajectory_losses.cpu().numpy(), np.cumsum(chunk_sizes)[:-1])
tr_coordinates = np.split(trajectory_coordinates.cpu().numpy(), np.cumsum(chunk_sizes)[:-1])

In [None]:
real_losses = [run.results["val_losses"] for run in runs]

In [None]:
# Fix, that there is one epoch 0 for the pt files
for i in range(num_chunks):
    #first_loss = tr_losses[i][0]
    real_losses[i] = np.concatenate(([np.NaN], real_losses[i]))

In [None]:
import matplotlib.pyplot as plt

cols = 2
rows = int(np.ceil(num_chunks / cols))

fig, axes = plt.subplots(rows, cols, figsize=(14, rows * 4), squeeze=False)

for i in range(num_chunks):
    r, c = divmod(i, cols)
    ax = axes[r, c]

    ax.plot(real_losses[i], label='Logged Validation Loss', marker='o')
    ax.plot(tr_losses[i], label='AE-Projected Validation Loss', marker='x')

    ax.set_title(titles[i])
    ax.set_xlabel('Checkpoint Index')
    ax.set_ylabel('Loss (Cross Entropy)')
    ax.grid(True)
    ax.legend()

# Hide unused subplots (if odd number of runs)
for j in range(num_chunks, rows * cols):
    r, c = divmod(j, cols)
    fig.delaxes(axes[r, c])

plt.tight_layout()
plt.show()

In [None]:
# Generate grid in latent space
from helper.neuro_viz import generate_latent_grid, compute_grid_losses, compute_grid_losses_batched
xx, yy, grid_coords = generate_latent_grid(
    min_map=-1.1, max_map=1.1,
    xnum=15, # 3 - 25
    device=device
)

grid_losses = compute_grid_losses_batched(
    grid_coords,
    transform,
    ae_model,
    model,
    loss_obj,
    loss_name,
    whichloss,
    device,
)

# Reshape to grid
grid_losses = grid_losses.view(xx.shape)

In [None]:
print(grid_losses.min().item(), grid_losses.max().item())

In [None]:
rec_grid_models = ae_model.decoder(grid_coords)
rec_grid_models = rec_grid_models*transform.std.to(device) + transform.mean.to(device)

In [None]:
from helper.neuro_viz import plot_loss_landscape

fig = plot_loss_landscape(
    xx, yy,
    grid_losses,
    tr_losses, # real_losses or tr_losses
    tr_coordinates,
    rec_grid_models=rec_grid_models,
    draw_density=False,
    filled_contours=True,
    trajectory_labels=titles,            # NEW: list of strs, one per trajectory - ['Test 1', 'Test 2', 'Test 3', 'Test 4'],
    label_positions=[('left', 'top'), ('left', 'bottom'), ('right', 'bottom'), ('right', 'top'), ('left', 'center'), ('center', 'bottom')], # ('left'|'center'|'right', 'top'|'center'|'bottom')
)

In [None]:
# Save to PDF
os.makedirs('plots', exist_ok=True)
name = f"plots/loss_landscape_{vis_id}_{'lmc' if include_lmc else ''}.pdf"
fig.savefig(name, dpi=300, bbox_inches='tight', format='pdf')
print(f"Saved PDF to {name}")

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import matplotlib.ticker as ticker
import numpy as np

def plot_loss_landscape(
    xx, yy,
    grid_losses, trajectory_losses_list, trajectory_coords_list,
    rec_grid_models=None,
    draw_density=True,
    filled_contours=True,
    cmap='viridis',
    loss_label='Cross Entropy Loss',
    trajectory_labels=None,
    label_positions=None,
    lmc_coords_list=None,        # NEW
    lmc_losses_list=None         # NEW
):
    # === PREPARE LOSSES ===
    grid_losses_pos = grid_losses.detach().cpu().numpy()

    # === SHARED COLOR SCALE ===
    traj_losses_all = np.concatenate([t for t in trajectory_losses_list])
    all_losses = np.concatenate([grid_losses_pos.flatten(), traj_losses_all])
    vmin = np.clip(all_losses.min() / 1.2, 1e-5, None)
    vmax = all_losses.max() * 1.2

    if vmin >= vmax or np.isclose(vmin, vmax):
        vmax = vmin * 10
        print(f"Adjusted nearly-constant losses: vmin={vmin}, vmax={vmax}")

    levels = np.logspace(np.log10(vmin), np.log10(vmax), 30)
    norm = LogNorm(vmin=vmin, vmax=vmax)

    # === BEGIN PLOTTING ===
    fig, ax = plt.subplots(figsize=(8, 6))

    # -- 1 Loss Landscape --
    X = xx.cpu().numpy()
    Y = yy.cpu().numpy()

    if filled_contours:
        contour = ax.contourf(X, Y, grid_losses_pos, levels=levels, norm=norm, cmap=cmap)
    else:
        contour = ax.contour(X, Y, grid_losses_pos, levels=levels, norm=norm, cmap=cmap)
        ax.clabel(contour, fmt="%.2e", fontsize=8)

    cbar = plt.colorbar(contour, ax=ax, shrink=0.8)
    ticks = np.logspace(np.log10(vmin), np.log10(vmax), 5)
    cbar.set_ticks(ticks)
    cbar.ax.set_ylabel(loss_label, fontsize=12)

    # -- 2 & 3: Plot Training Trajectories --
    for z_tensor, losses_tensor in zip(trajectory_coords_list, trajectory_losses_list):
        z = z_tensor
        losses = losses_tensor
        for i in range(len(z) - 1):
            ax.plot([z[i, 0], z[i + 1, 0]], [z[i, 1], z[i + 1, 1]], color='k', linewidth=1)
        ax.scatter(
            z[:, 0], z[:, 1],
            c=losses, cmap=cmap, norm=norm,
            s=40, edgecolors='k'
        )

    # -- 3b: Annotate each trajectory at its last point --
    offset_pts = 5
    n_traj = len(trajectory_coords_list)
    if trajectory_labels is None:
        trajectory_labels = [f"traj {i}" for i in range(n_traj)]
    if label_positions is None:
        label_positions = ['auto'] * n_traj

    for idx, (z, losses, lab) in enumerate(zip(
            trajectory_coords_list, trajectory_losses_list, trajectory_labels)):
        x_end, y_end = float(z[-1, 0]), float(z[-1, 1])
        pos = label_positions[idx]
        if pos != 'auto':
            ha, va = pos
        else:
            dx = z[-1, 0] - z[-2, 0]
            dy = z[-1, 1] - z[-2, 1]
            ha = 'left'   if dx >= 0 else 'right'
            va = 'bottom' if dy >= 0 else 'top'
        ox = offset_pts if ha == 'left' else (-offset_pts if ha == 'right' else 0)
        oy = offset_pts if va == 'bottom' else (-offset_pts if va == 'top' else 0)
        ax.annotate(
            lab, xy=(x_end, y_end), xytext=(ox, oy),
            textcoords='offset points', ha=ha, va=va,
            fontsize=7, bbox=dict(boxstyle="round,pad=0.2", fc="white", alpha=0.6),
            arrowprops=dict(arrowstyle='-', lw=0)
        )

    # -- 4: Plot LMC Lines (in red) --
    if lmc_coords_list is not None and lmc_losses_list is not None:
        for z_tensor, losses_tensor in zip(lmc_coords_list, lmc_losses_list):
            z = z_tensor
            losses = losses_tensor
            ax.plot(z[:, 0], z[:, 1], color='red', linewidth=1.5, linestyle='--')
            ax.scatter(
                z[:, 0], z[:, 1],
                c=losses, cmap=cmap, norm=norm,
                s=30, edgecolors='r'
            )

    # -- 5 OPTIONAL: Density Contours --
    if draw_density and rec_grid_models is not None:
        try:
            from NeuroVisualizer.neuro_aux.utils import get_density
            density = get_density(rec_grid_models.detach().cpu().numpy(), type='inverse', p=2)
            density = density.reshape(xx.shape)
            density_levels = np.logspace(
                np.log10(max(density.min(), 1e-3)), np.log10(density.max()), 15
            )
            CS_density = ax.contour(
                X, Y, density,
                levels=density_levels,
                colors='white', linewidths=0.8
            )
            ax.clabel(CS_density, fmt=ticker.FormatStrFormatter('%.1f'), fontsize=7)
        except Exception as e:
            print("Density contour skipped:", e)

    # -- 6 Labels, Grid, Style --
    ax.set_title('Loss Landscape with Training Trajectories and LMC Paths', fontsize=14)
    ax.set_xlabel('Latent Dimension 1', fontsize=12)
    ax.set_ylabel('Latent Dimension 2', fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.3)

    return fig

In [None]:
fig_lmc = plot_loss_landscape(
    xx, yy,
    grid_losses,
    tr_losses, # real_losses or tr_losses
    tr_coordinates,
    rec_grid_models=rec_grid_models,
    draw_density=False,
    filled_contours=True,
    trajectory_labels=titles,            # NEW: list of strs, one per trajectory - ['Test 1', 'Test 2', 'Test 3', 'Test 4'],
    label_positions=[('left', 'top'), ('center', 'top'), ('right', 'bottom'), ('right', 'top'), ('left', 'center'), ('center', 'bottom')], # ('left'|'center'|'right', 'top'|'center'|'bottom')
    lmc_coords_list=lmc_coords_list, # NEW
    lmc_losses_list=lmc_losses_list, # NEW
)

In [None]:
os.makedirs('plots', exist_ok=True)
filename = f"plots/loss_landscape_{vis_id}_{'with' if include_lmc else 'no'}_lmc.pdf"
fig_lmc.savefig(filename, dpi=300, bbox_inches='tight', format='pdf')
print(f"Saved PDF to {filename}")

In [None]:
from helper.neuro_viz import plot_density_only

fig_density = plot_density_only(xx, yy, rec_grid_models,
                                trajectory_coords_list=tr_coordinates)

In [None]:
os.makedirs('plots', exist_ok=True)
fig_density.savefig(f'plots/loss_landscape_{vis_id}_density.pdf', dpi=300, bbox_inches='tight', format='pdf')
print(f"Saved PDF to plots/loss_landscape_{vis_id}_density.pdf")