In [None]:
import os
import torch
from torch import nn

from NeuroVisualizer.neuro_aux.AEmodel import UniformAutoencoder
from NeuroVisualizer.neuro_aux.utils import get_files

from helper.neuro_viz import get_dataloader_flat

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Neuro-Visualizer
This notebook creates the loss landscape from the NeuroVisualizer

In [None]:
dataset_name = 'cifar10'
model_name = 'CNN'
runs = [
#    "run-0011-CNN_mnist_32_0.9776",
#    "run-0012-CNN_mnist_32_0.9768",
#    "run-0007-CNN_mnist_128_0.9851",
    
    # With Residual
#    "run-0016-CNN_cifar10_128_0.8093", # Seed 42, SAM
#    "run-0018-CNN_cifar10_128_0.8499", # Seed 42
#    "run-0020-CNN_cifar10_128_0.8079", # Seed 11, SAM
#    "run-0022-CNN_cifar10_128_0.8519", # Seed 11
    
    # No Residual
    "run-0017-CNN_cifar10_128_0.8072", # Seed 42, SAM
    "run-0019-CNN_cifar10_128_0.8487", # Seed 42
    "run-0021-CNN_cifar10_128_0.8054", # Seed 11, SAM
    "run-0023-CNN_cifar10_128_0.8509", # Seed 11
]

In [None]:
titles = [
    #"Seed 42, SAM, Residual 0.8093", #16
    "Seed 42, SAM, 0.8072",          #17
    #"Seed 42, SGD, Residual 0.8499", #18
    "Seed 42, SGD, 0.8487",          #19
    #"Seed 11, SAM, Residual 0.8079", #20
    "Seed 11, SAM, 0.8054",          #21
    #"Seed 11, SGD, Residual 0.8519", #22
    "Seed 11, SGD, 0.8509",          #23
]

In [None]:
from helper.data_manager import load_training_data
results = []
run_ids = []
vis_id = ""

for run in runs:
    results.append(load_training_data(run))
    run_ids.append(results[-1]["ll_flattened_weights_dir"])

vis_id = ' x '.join(run_ids)
print(run_ids)
print(vis_id)

In [None]:
model_file = f'ae_models/{vis_id}.pt'

In [None]:
#model_file = "ae_models/run-0016-CNN x run-0017-CNN x run-0018-CNN x run-0019-CNN x run-0020-CNN x run-0021-CNN.pt"

In [None]:
# Adjust this path to your folder
pt_files = []

for run_id in run_ids:
    model_folder = f"trainings/{run_id}"
    pt_files.append(get_files(model_folder, prefix="model-"))
    print(f"Found {len(pt_files[-1])} checkpoint files.")

pt_files_flat = [path for sublist in pt_files for path in sublist]

## Train AE Model
Run this part to train an AE-Model

In [None]:
batch_size = 8 #4 - 32 Batch Size of AE Training

loader, normalizer = get_dataloader_flat(pt_files_flat, batch_size,
                                         shuffle=True, oversample_later=True, # more samples from later epochs, that diverge more
                                         )

In [None]:
torch.cuda.empty_cache()

Adjust: Choose the hidden dimension (that the model-GPU combination is still working with)

In [None]:
input_dim = loader.dataset[0].shape[0]
print(f"Input dimension: {input_dim}")

latent_dim = 2
num_layers = 4

# Aggressive compression (scales with first hidden dim)
#h = [input_dim, 64, 32, 8]
h = [input_dim, 128, 64, 16]
ae = UniformAutoencoder(input_dim, num_layers, latent_dim, h=h).to(device)

#ae = UniformAutoencoder(input_dim, num_layers, latent_dim).to(device)

In [None]:
total_params = sum(p.numel() for p in ae.parameters())
trainable_params = sum(p.numel() for p in ae.parameters() if p.requires_grad)

size_mb = total_params * 4 / (1024**2)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Approx. size: {size_mb:.2f} MB")

In [None]:
# Load from previous train process (if available, eg. after a crash)
ae.load_state_dict(torch.load(model_file, weights_only=True))

In [None]:
from helper.neuro_viz import train_autoencoder

trained_model = train_autoencoder(
    model=ae,
    train_loader=loader,
    device=device,
    save_path=model_file,
    num_epochs=50, #1000 would be great
    lr=0.005, # 0.01 -
    patience=15,
    avoid_overheat=True # Avoids chrashes on Nembus Computer
)

# ~ 0.0173 possible

In [None]:
torch.save(ae.state_dict(), model_file)

## Visualize Trajectory

In [None]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt

from NeuroVisualizer.neuro_aux.AEmodel import UniformAutoencoder
from NeuroVisualizer.neuro_aux.utils import get_files, repopulate_model
from NeuroVisualizer.neuro_aux.trajectories_data import get_trajectory_dataloader

In [None]:
batch_size = 4
loss_name = 'test_loss'
whichloss = 'mse' # this is CrossEntropyLoss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Get file list
# pt_files = get_files(model_folder, prefix="model-")

# Load AE
example_tensor = torch.load(pt_files[0][0], weights_only=True)
input_dim = example_tensor.shape[0]
latent_dim = 2
num_layers = 4
#h = [input_dim, 64, 32, 8]
h = [input_dim, 128, 64, 16]


ae_model = UniformAutoencoder(input_dim, num_layers, latent_dim, h=h).to(device)
ae_model.load_state_dict(torch.load(model_file, weights_only=True))
#ae_model.eval()

In [None]:
# ---- Load data ----
from helper.neuro_viz import get_dataloader_flat

trajectory_loader, transform = get_dataloader_flat(pt_files_flat, batch_size, shuffle=False) #[:5] for Subset

### Repopulate original Model Architecture
**IMPORTANT: needs correct model**

In [None]:
for result in results:
    print(result["model_info"])

In [None]:
from helper.vision_classification import init_mlp_for_dataset, init_cnn_for_dataset
from helper.neuro_viz import Loss

#TODO Check the model:
#model = init_cnn_for_dataset(dataset_name, conv_dims=[8, 16], kernel_sizes=[3, 3], hidden_dims=[32], dropout=0.25, residual=False).to(device)
#model = init_cnn_for_dataset(dataset_name, conv_dims=[8, 16], kernel_sizes=[3, 3], hidden_dims=[32], dropout=0.25, residual=True).to(device)

#model = init_cnn_for_dataset(dataset_name, conv_dims=[32, 64], kernel_sizes=[3, 3], hidden_dims=[128], dropout=0.25, residual=False).to(device)
#model = init_cnn_for_dataset(dataset, conv_dims=[32, 64], kernel_sizes=[3, 3], hidden_dims=[128], dropout=0.25, residual=True).to(device)
model = init_cnn_for_dataset(dataset_name, conv_dims=[64, 128, 256], kernel_sizes=[5, 3, 3], hidden_dims=[256, 128], dropout=0.2, residual=False).to(device)
#model = init_mlp_for_dataset(dataset_name, hidden_dims=[254, 64], dropout=0.1).to(device)
loss_obj = Loss(dataset_name, device)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Trainable parameters: {count_parameters(model):,}")
print(f"AE input/output dim:  {ae_model.encoder.fcs[0].in_features:,}")

#### Compute trajectory (Coordinates and Loss)

In [None]:
from helper.neuro_viz import compute_trajectory

trajectory_coordinates, trajectory_models, trajectory_losses = compute_trajectory(
    trajectory_loader,
    ae_model,
    transform,
    loss_obj,
    model,
    loss_name,
    whichloss,
    device,
)
# Mismatch in parameter size: 2340426 vs 1878346
# Mismatch in parameter size: 2340426 vs 1878346

In [None]:
num_chunks = len(pt_files)
chunk_size = len(pt_files[0])

# Reshape the values
tr_losses = np.split(trajectory_losses.cpu().numpy(), num_chunks)  # list of arrays
tr_coordinates = np.split(trajectory_coordinates.cpu().numpy(), num_chunks)  # list of arrays

real_losses = [results[i]["val_losses"] for i in range(num_chunks)]

In [None]:
# Fix, that there is one epoch 0 for the pt files
for i in range(num_chunks):
    first_loss = tr_losses[i][0]
    real_losses[i] = np.concatenate(([first_loss], real_losses[i]))

In [None]:
import matplotlib.pyplot as plt

for i in range(num_chunks):
    plt.figure(figsize=(8, 4))

    plt.plot(real_losses[i], label='Logged Validation Loss', marker='o')
    plt.plot(tr_losses[i], label='AE-Projected Validation Loss', marker='x')

    plt.legend()
    plt.title(results[i]['train_config'])
    plt.xlabel('Checkpoint Index')
    plt.ylabel('Loss (Cross Entropy)')
    plt.grid(True)
    plt.show()

In [None]:
# Generate grid in latent space
from helper.neuro_viz import generate_latent_grid, compute_grid_losses, compute_grid_losses_batched
xx, yy, grid_coords = generate_latent_grid(
    min_map=-1.1, max_map=1.1,
    xnum=30, # 3 - 25
    device=device
)

# Decode grid and compute losses
#model = init_mlp_for_dataset(dataset_name, hidden_dims=[254, 64], dropout=0.1).to(device)

grid_losses = compute_grid_losses_batched(
    grid_coords,
    transform,
    ae_model,
    model,
    loss_obj,
    loss_name,
    whichloss,
    device
)

# Reshape to grid
grid_losses = grid_losses.view(xx.shape)

In [None]:
print(grid_losses.min().item(), grid_losses.max().item())

In [None]:
rec_grid_models = ae_model.decoder(grid_coords)
rec_grid_models = rec_grid_models*transform.std.to(device) + transform.mean.to(device)

If CUDA out of memory

In [None]:
def decode_grid_in_batches(ae_model, grid_coords, transform, device, batch_size=32):
    ae_model.eval()
    std = transform.std.to(device)
    mean = transform.mean.to(device)
    chunks = []

    with torch.no_grad():
        for i in range(0, grid_coords.size(0), batch_size):
            coords = grid_coords[i : i + batch_size].to(device)      # [B,2]
            rec = ae_model.decoder(coords)                            # [B, D]
            rec = rec * std + mean                                    # [B, D]
            chunks.append(rec.cpu())     # move back to CPU immediately
            del coords, rec
            torch.cuda.empty_cache()     # free any cached GPU memory

    return torch.cat(chunks, dim=0)      # [N, D]

In [None]:
rec_grid_models = decode_grid_in_batches(
    ae_model, grid_coords, transform, device, batch_size=16
)

In [None]:
from helper.neuro_viz import plot_loss_landscape

fig = plot_loss_landscape(
    xx, yy,
    grid_losses,
    real_losses, # real_losses or tr_losses
    tr_coordinates,
    rec_grid_models=rec_grid_models,
    draw_density=False,
    filled_contours=False
)

In [None]:
# Save to PDF
os.makedirs('plots', exist_ok=True)
fig.savefig(f'plots/loss_landscape_{vis_id}.pdf', dpi=300, bbox_inches='tight', format='pdf')
print(f"Saved PDF to plots/loss_landscape_{vis_id}.pdf")

plt.show()

In [None]:
fig = plot_loss_landscape(
    xx, yy,
    grid_losses,
    real_losses, # real_losses or tr_losses
    tr_coordinates,
    rec_grid_models=rec_grid_models,
    draw_density=False,
    filled_contours=False,
    trajectory_labels=titles,            # NEW: list of strs, one per trajectory - ['Test 1', 'Test 2', 'Test 3', 'Test 4'],
    label_positions=[('center', 'top'), ('right', 'top'), ('right', 'bottom'), ('right', 'bottom')], # ('left'|'center'|'right', 'top'|'center'|'bottom')
)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import matplotlib.ticker as ticker
import numpy as np

def plot_loss_landscape(
    xx, yy,
    grid_losses, trajectory_losses_list, trajectory_coords_list,
    rec_grid_models=None,
    draw_density=True,
    filled_contours=True,
    cmap='viridis',
    loss_label='Cross Entropy Loss',
    trajectory_labels=None,
    label_positions=None,
):
    # === PREPARE LOSSES ===
    grid_losses_pos = grid_losses.detach().cpu().numpy()

    # === SHARED COLOR SCALE ===
    traj_losses_all = np.concatenate([t for t in trajectory_losses_list])
    all_losses = np.concatenate([grid_losses_pos.flatten(), traj_losses_all])
    vmin = np.clip(all_losses.min() / 1.2, 1e-5, None)
    vmax = all_losses.max() * 1.2

    if vmin >= vmax or np.isclose(vmin, vmax):
        vmax = vmin * 10
        print(f"Adjusted nearly-constant losses: vmin={vmin}, vmax={vmax}")

    levels = np.logspace(np.log10(vmin), np.log10(vmax), 30)
    norm = LogNorm(vmin=vmin, vmax=vmax)

    # === BEGIN PLOTTING ===
    fig, ax = plt.subplots(figsize=(8, 6))

    # -- 1 Loss Landscape --
    X = xx.cpu().numpy()
    Y = yy.cpu().numpy()

    if filled_contours:
        contour = ax.contourf(X, Y, grid_losses_pos, levels=levels, norm=norm, cmap=cmap)
    else:
        contour = ax.contour(X, Y, grid_losses_pos, levels=levels, norm=norm, cmap=cmap)
        ax.clabel(contour, fmt="%.2e", fontsize=8)

    cbar = plt.colorbar(contour, ax=ax, shrink=0.8)
    ticks = np.logspace(np.log10(vmin), np.log10(vmax), 5)  # customize number here
    cbar.set_ticks(ticks)
    cbar.ax.set_ylabel(loss_label, fontsize=12)

    # -- 2 & 3: Plot Multiple Trajectories --
    for z_tensor, losses_tensor in zip(trajectory_coords_list, trajectory_losses_list):
        z = z_tensor
        losses = losses_tensor
        # Lines
        for i in range(len(z) - 1):
            ax.plot([z[i, 0], z[i + 1, 0]], [z[i, 1], z[i + 1, 1]], color='k', linewidth=1)
        # Points
        ax.scatter(
            z[:, 0], z[:, 1],
            c=losses,
            cmap=cmap,
            norm=norm,
            s=40,
            edgecolors='k'
        )

    # ===== 3b: Annotate each trajectory at its last point =====
    offset_pts = 12  # how far, in points, to shift the label

    # defaults
    n_traj = len(trajectory_coords_list)
    if trajectory_labels is None:
        trajectory_labels = [f"traj {i}" for i in range(n_traj)]
    if label_positions is None:
        label_positions = ['auto'] * n_traj

    for idx, (z, losses, lab) in enumerate(zip(
            trajectory_coords_list,
            trajectory_losses_list,
            trajectory_labels)):
        x_end, y_end = float(z[-1, 0]), float(z[-1, 1])

        # decide alignment
        pos = label_positions[idx]
        if pos != 'auto':
            ha, va = pos
        else:
            dx = z[-1, 0] - z[-2, 0]
            dy = z[-1, 1] - z[-2, 1]
            ha = 'left'   if dx >= 0 else 'right'
            va = 'bottom' if dy >= 0 else 'top'

        # convert alignment into point‐offset direction
        ox =  offset_pts if ha == 'left'   else (-offset_pts if ha == 'right' else 0)
        oy =  offset_pts if va == 'bottom' else (-offset_pts if va == 'top'   else 0)

        # annotate with offset
        ax.annotate(
            lab,
            xy=(x_end, y_end),
            xytext=(ox, oy),
            textcoords='offset points',
            ha=ha, va=va,
            fontsize=10,
            bbox=dict(boxstyle="round,pad=0.2", fc="white", alpha=0.6),
            arrowprops=dict(arrowstyle='-', lw=0)
        )

    # -- 4 OPTIONAL: Density Contours --
    if draw_density and rec_grid_models is not None:
        try:
            from NeuroVisualizer.neuro_aux.utils import get_density
            density = get_density(rec_grid_models.detach().cpu().numpy(), type='inverse', p=2)
            density = density.reshape(xx.shape)
            density_levels = np.logspace(
                np.log10(max(density.min(), 1e-3)),
                np.log10(density.max()),
                15
            )
            CS_density = ax.contour(
                X, Y, density,
                levels=density_levels,
                colors='white',
                linewidths=0.8
            )
            ax.clabel(CS_density, fmt=ticker.FormatStrFormatter('%.1f'), fontsize=7)
        except Exception as e:
            print("Density contour skipped:", e)

    # -- 5 Labels, Grid, Style --
    ax.set_title('Loss Landscape with Training Trajectory', fontsize=14)
    ax.set_xlabel('Latent Dimension 1', fontsize=12)
    ax.set_ylabel('Latent Dimension 2', fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.3)

    # -- 6 Show --
    #plt.show()

    return fig