# Random Perturbations

This notebook is a streamlined notebook for evaluating random perturbations on saved models and datasets.

## Imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

import time

# Importing our existing funcs
import os
import sys
from pathlib import Path
parent_dir = Path.cwd().parent.parent.parent
sys.path.append(str(parent_dir))
# Import modules

from perturb_funcs import ( analyze_wiggles_metrics )

from dataset_funcs import ( prepare_datasets,
                            save_model, save_dataset,
                            load_model, load_dataset,
                            load_models_and_data)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Input Parameters

In [2]:
# Perturbation Configuration
perturbation_seed = 1
num_directions = 2000
N = 100
x = np.linspace(0, 1, N)
coefficients = x**2

# Perturbation Dataset Configuration
# We use all of the base data size
# but only some of the additional data
dataset_quantities = [0, 600 - 60, 6000 - 60, 60000 - 60]

# Base output directory
base_output_dir = ""

## Model

In [3]:
import torch.nn.functional as F

# Model Definition
class NetMLP(nn.Module):
    def __init__(self, seed=None):
        super(NetMLP, self).__init__()
        if seed is not None:
            self.seed = seed
            with torch.random.fork_rng():
                torch.manual_seed(seed)
                self._initialize_layers()
        else:
            self._initialize_layers()

    def _initialize_layers(self):
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)        # Flatten input (B, 1, 28, 28) → (B, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No softmax here
        return x


## Loading Model and Datasets

In [4]:
# Get the relative path
target_dir = Path("models_and_data") #current directory

# Lists to store loaded models and additional data
model_template = NetMLP().to(device)
loaded_models, loaded_model_data, loaded_dataset = load_models_and_data(model_template=model_template, target_dir=target_dir, device=device)
# Print dataset information
dataset_type = loaded_dataset['dataset_type']
print(f"Dataset type: {dataset_type}")
print(f"Dataset quantities: {loaded_dataset['dataset_quantities']}")

# Print tensor shapes
print("\nTensor shapes:")
for key in ["x_base_train", "y_base_train", "x_additional", "y_additional", "x_test", "y_test"]:
    if loaded_dataset[key] is not None:
        print(f"  {key}: {loaded_dataset[key].shape}")
    else:
        print(f"  {key}: None")

# Reconstruct the trained_model structure for each model, to pass into perturbations
all_models = []
for i, (model, model_data) in enumerate(zip(loaded_models, loaded_model_data)):
    trained_model = {
        'model': model,
        'train_losses': model_data['train_losses'],
        'train_accs': model_data['train_accs'],
        'test_losses': model_data['test_losses'],
        'test_accs': model_data['test_accs'],
        'additional_data': model_data['additional_data'],
        'dataset_type': model_data['dataset_type'],
    }
    all_models.append(trained_model)

# Now all_models has the same structure as your original trained_model list
print(f"Reconstructed {len(all_models)} trained models")

Looking for models and dataset in: L:\Programming\ARC\minima_sizes\MNIST\experiments - low data\model_hidden_2000\Example Folder\models_and_data
Found 4 model files:
  - model_additional_0.pt
  - model_additional_540.pt
  - model_additional_5940.pt
  - model_additional_59940.pt
✅ Model loaded into provided instance from models_and_data\model_additional_0.pt
Successfully loaded: model_additional_0.pt
✅ Model loaded into provided instance from models_and_data\model_additional_540.pt
Successfully loaded: model_additional_540.pt
✅ Model loaded into provided instance from models_and_data\model_additional_5940.pt
Successfully loaded: model_additional_5940.pt
✅ Model loaded into provided instance from models_and_data\model_additional_59940.pt
Successfully loaded: model_additional_59940.pt

Model data loaded from all models:
Model 0 (model_additional_0.pt):
  - Additional data: 0
  - Dataset type: data
  - Training accuracies: 500 entries
  - Test accuracies: 500 entries
Model 1 (model_additio

### Prepare For Perturbations

In [5]:
x_base_train = loaded_dataset['x_base_train']
y_base_train = loaded_dataset['y_base_train']
x_additional = loaded_dataset['x_additional']
y_additional = loaded_dataset['y_additional']

x_test = loaded_dataset['x_test']
y_test = loaded_dataset['y_test']

## Perturbations

We perform perturbations, sending in the models trained along with parameters to reproduce a small number of the training datasets, and evaluating their training and test losses over perturbations.

For a large dataset, we'll use the GPU.

In [6]:
criterion = nn.CrossEntropyLoss()

def loss_fn(logits, labels):
    return criterion(logits, labels)

def accuracy_fn(logits, labels):
    preds = logits.argmax(dim=1)
    return (preds == labels).float().mean()

analyze_wiggles_metrics(model_list = all_models, 
                x_base_train = x_base_train, y_base_train = y_base_train, 
                x_additional = x_additional, y_additional = y_additional,
                x_test = x_test, y_test = y_test, 
                dataset_quantities = dataset_quantities, 
                dataset_type = dataset_type, 
                metrics={"losses": loss_fn, "accuracies": accuracy_fn},
                coefficients = coefficients,
                num_directions = num_directions,
                perturbation_seed = perturbation_seed,
                base_output_dir = base_output_dir, #f"results/test_swiss_model_{model_seed}_data_{data_seed}"
                device = device) #instead of device

""" Our saved results are structured as follows:
wiggle_results: List of dictionaries containing wiggle test results
Each dictionary is of the form
{
'losses':
'coefficients':
'accuracies':
'perturbation_seed':
'perturbation_norm':
}
model: PyTorch model used in analysis (state_dict will be saved)
output_dir: Directory to save results (default: "imgs/swiss/random_dirs")
filename: Name of output file (default: "random_directions.npz")
**kwargs: Additional key-value pairs to be saved in the output file
Typically:
'additional_data':
'model_trained_data':
'dataset_type':
'base_dataset_size': 
'test_loss':
'test_accuracy':
'num_params':
"""

The number of parameters of the perturbation is 235146
Testing on data with 0 samples - 20 directions
Testing model trained on 0 additional data.
Test performance - Loss: 1.9667, Accuracy: 0.6860
Wiggle completed in 1.93 seconds for data model trained with 0 samples
Saved to data_0

Testing model trained on 540 additional data.
Test performance - Loss: 0.7910, Accuracy: 0.8752
Wiggle completed in 1.76 seconds for data model trained with 540 samples
Saved to data_0

Testing model trained on 5940 additional data.
Test performance - Loss: 0.3433, Accuracy: 0.9426
Wiggle completed in 1.71 seconds for data model trained with 5940 samples
Saved to data_0

Testing model trained on 59940 additional data.
Test performance - Loss: 0.0985, Accuracy: 0.9762
Wiggle completed in 1.71 seconds for data model trained with 59940 samples
Saved to data_0

Testing on data with 540 samples - 20 directions
Testing model trained on 540 additional data.
Test performance - Loss: 0.7910, Accuracy: 0.8752
Wiggle 

' Our saved results are structured as follows:\nwiggle_results: List of dictionaries containing wiggle test results\nEach dictionary is of the form\n{\n\'losses\':\n\'coefficients\':\n\'accuracies\':\n\'perturbation_seed\':\n\'perturbation_norm\':\n}\nmodel: PyTorch model used in analysis (state_dict will be saved)\noutput_dir: Directory to save results (default: "imgs/swiss/random_dirs")\nfilename: Name of output file (default: "random_directions.npz")\n**kwargs: Additional key-value pairs to be saved in the output file\nTypically:\n\'additional_data\':\n\'model_trained_data\':\n\'dataset_type\':\n\'base_dataset_size\': \n\'test_loss\':\n\'test_accuracy\':\n\'num_params\':\n'