In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import collections
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor, tensor
import pandas as pd
import openml

#from aeon.regression.sklearn import RotationForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from utils.utils import print_name, print_shape
from models import ResNet, NeuralEulerODE, RidgeClassifierCVModule

np.set_printoptions(precision=3, threshold=5) # Print options

# MNIST

In [2]:
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download and load the training data
mnist_path = "/home/nikita/hdd/MNIST"
trainset = datasets.MNIST(mnist_path, download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=False)

# Download and load the test data
testset = datasets.MNIST(mnist_path, download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset), shuffle=False)

# Flatten the data
X_train, y_train_cat = next(iter(trainloader))
X_train = X_train.view(len(trainset), -1)
X_test, y_test_cat = next(iter(testloader))
X_test = X_test.view(len(testset), -1)

# Convert train and test labels to one-hot encoding
y_train = F.one_hot(y_train_cat, num_classes=10).float()
y_test = F.one_hot(y_test_cat, num_classes=10).float()

# Normalize by mean and std
X_train, X_test = normalize_mean_std_traindata(X_train, X_test)
print(f"Train data shape: {X_train.shape}")
print(f"Train labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Test labels shape: {y_test.shape}")

Train data shape: torch.Size([60000, 784])
Train labels shape: torch.Size([60000, 10])
Test data shape: torch.Size([10000, 784])
Test labels shape: torch.Size([10000, 10])


# experiments

In [3]:
def run_allmodels_1dataset(
        generator: torch.Generator,
        X_train: Tensor,
        y_train: Tensor,
        X_test: Tensor,
        y_test: Tensor,
        ):
    
    D = X_train.shape[1]
    hidden_size = 128
    bottleneck_dim = 2*hidden_size

    # (name, model, kwargs). kwargs separate to save memory
    model_list = [
        #["Tabular Ridge", RidgeClassifierCVModule, {}],

        ["T=1 Dense", ResNet,
                {"generator": generator,
                 "in_dim": D,
                 "hidden_size": hidden_size,
                 "bottleneck_dim": None,
                 "n_blocks": 0,
                 "upsample_layer": "dense",
                 "output_layer": "ridge classifier",
                 }
                 ],

        ["T=1 SWIM Grad", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": None,
                "n_blocks": 0,
                "upsample_layer": "SWIM",
                "output_layer": "ridge classifier",
                }
                ],
        
        ["T=1 SWIM Unif", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": None,
                "n_blocks": 0,
                "upsample_layer": "SWIM",
                "sampling_method": "uniform",
                "output_layer": "ridge classifier",
                }
                ],
    ]

    for n_blocks in [2, 4]:
        model_list += [
        [f"T={n_blocks+1} ResSWIM Grad-dense", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": bottleneck_dim,
                "n_blocks": n_blocks,
                "upsample_layer": "SWIM",
                "res_layer1": "SWIM",
                "res_layer2": "dense",
                "output_layer": "ridge classifier",
                }
                ],

        [f"T={n_blocks+1} ResSWIM Grad-id", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "SWIM",
                "res_layer1": "SWIM",
                "res_layer2": "identity",
                "output_layer": "ridge classifier",
                }
                ],
        [f"T={n_blocks+1} ResSWIM Grad-dense UPDENSE", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": bottleneck_dim,
                "n_blocks": n_blocks,
                "upsample_layer": "dense",
                "res_layer1": "SWIM",
                "res_layer2": "dense",
                "output_layer": "ridge classifier",
                }
                ],

        [f"T={n_blocks+1} ResSWIM Grad-id UPDENSE", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "dense",
                "res_layer1": "SWIM",
                "res_layer2": "identity",
                "output_layer": "ridge classifier",
                }
                ],

        [f"T={n_blocks+1} ResDense", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "dense",
                "res_layer1": "dense",
                "res_layer2": "identity",
                "output_layer": "ridge classifier",
                }
                ],
    ]
    
    results = []
    model_names = []
    for name, model, model_args in model_list:
        print(name)
        with torch.no_grad():
            t0 = time.perf_counter()
            model = model(**model_args).to(X_train.device)
            pred_train, _ = model.fit(X_train, y_train)
            t1 = time.perf_counter()
            pred_test = model(X_test)
            t2 = time.perf_counter()
            acc_train = (pred_train == y_train_cat).float().mean().item()
            acc_test = (pred_test == y_test_cat).float().mean().item()

            result = np.array( [acc_train, acc_test, t1-t0, t2-t1] )
            results.append( result )
            model_names.append( name )

    return model_names, results



def run_all_experiments(
        name_save: str = "PLACEHOLDER",
        device="cpu", #TODO only supports cpu for now due to mnist loading
        ):
    # Fetch and process each dataset
    experiments = {}
    generator = torch.Generator(device=device).manual_seed(999)
    results = run_allmodels_1dataset(
        generator, X_train, y_train, X_test, y_test, 
        )
    experiments["MNIST"] = results

    # Save results
    # Assuming experiments is a dict where keys are dataset names and values are tuples (model_names, results)
    attributes = ["acc_train", "acc_test", "t_fit", "t_feat"]
    data_list = []
    # Process the data
    for dataset_name, (model_names, results) in experiments.items():
        dataset_data = {}
        for attr_idx, attribute in enumerate(attributes):
            for model_idx, model_name in enumerate(model_names):
                dataset_data[(attribute, model_name)] = results[model_idx][attr_idx]
        data_list.append(pd.DataFrame(dataset_data, index=[dataset_name]))

    # Combine all datasets into a single DataFrame
    df = pd.concat(data_list)
    df = df.sort_index(axis=1)
    print(df)
    df.to_pickle(f"MNIST_ridge_{name_save}.pkl")
    return df

In [4]:
run_all_experiments()

T=1 Dense
T=1 SWIM Grad
T=1 SWIM Unif
T=3 ResSWIM Grad-dense
T=3 ResSWIM Grad-id
T=3 ResSWIM Grad-dense UPDENSE
T=3 ResSWIM Grad-id UPDENSE
T=3 ResDense
T=5 ResSWIM Grad-dense
T=5 ResSWIM Grad-id
T=5 ResSWIM Grad-dense UPDENSE
T=5 ResSWIM Grad-id UPDENSE
T=5 ResDense
       acc_test                                           \
      T=1 Dense T=1 SWIM Grad T=1 SWIM Unif T=3 ResDense   
MNIST    0.8305        0.8607        0.8559        0.823   

                                                             \
      T=3 ResSWIM Grad-dense T=3 ResSWIM Grad-dense UPDENSE   
MNIST                 0.8876                         0.8316   

                                                                    \
      T=3 ResSWIM Grad-id T=3 ResSWIM Grad-id UPDENSE T=5 ResDense   
MNIST              0.8903                      0.8296       0.8137   

                              ...        t_fit                         \
      T=5 ResSWIM Grad-dense  ... T=3 ResDense T=3 ResSWIM Grad-dense   
MNIS

Unnamed: 0_level_0,acc_test,acc_test,acc_test,acc_test,acc_test,acc_test,acc_test,acc_test,acc_test,acc_test,...,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit
Unnamed: 0_level_1,T=1 Dense,T=1 SWIM Grad,T=1 SWIM Unif,T=3 ResDense,T=3 ResSWIM Grad-dense,T=3 ResSWIM Grad-dense UPDENSE,T=3 ResSWIM Grad-id,T=3 ResSWIM Grad-id UPDENSE,T=5 ResDense,T=5 ResSWIM Grad-dense,...,T=3 ResDense,T=3 ResSWIM Grad-dense,T=3 ResSWIM Grad-dense UPDENSE,T=3 ResSWIM Grad-id,T=3 ResSWIM Grad-id UPDENSE,T=5 ResDense,T=5 ResSWIM Grad-dense,T=5 ResSWIM Grad-dense UPDENSE,T=5 ResSWIM Grad-id,T=5 ResSWIM Grad-id UPDENSE
MNIST,0.8305,0.8607,0.8559,0.823,0.8876,0.8316,0.8903,0.8296,0.8137,0.8821,...,2.721344,4.132681,3.121404,4.03337,3.036211,2.773693,4.642193,3.718368,4.37462,3.646327


In [18]:
df = pd.read_pickle("MNIST_ridge_PLACEHOLDER.pkl")
df["acc_test"].mean().sort_values(ascending=False)

T=3 ResSWIM Grad-id               0.8903
T=3 ResSWIM Grad-dense            0.8876
T=5 ResSWIM Grad-id               0.8860
T=5 ResSWIM Grad-dense            0.8821
T=1 SWIM Grad                     0.8607
T=1 SWIM Unif                     0.8559
T=3 ResSWIM Grad-dense UPDENSE    0.8316
T=5 ResSWIM Grad-dense UPDENSE    0.8308
T=1 Dense                         0.8305
T=3 ResSWIM Grad-id UPDENSE       0.8296
T=5 ResSWIM Grad-id UPDENSE       0.8275
T=3 ResDense                      0.8230
T=5 ResDense                      0.8137
dtype: float64

In [14]:
df["acc_test"].rank(axis=1, ascending=False).mean().sort_values()

T=3 ResSWIM Grad-id                1.0
T=3 ResSWIM Grad-dense             2.0
T=5 ResSWIM Grad-id                3.0
T=5 ResSWIM Grad-dense             4.0
T=1 SWIM Grad                      5.0
T=1 SWIM Unif                      6.0
T=3 ResSWIM Grad-dense UPDENSE     7.0
T=5 ResSWIM Grad-dense UPDENSE     8.0
T=1 Dense                          9.0
T=3 ResSWIM Grad-id UPDENSE       10.0
T=5 ResSWIM Grad-id UPDENSE       11.0
T=3 ResDense                      12.0
T=5 ResDense                      13.0
dtype: float64

In [15]:
df["acc_train"].mean().sort_values(ascending=False)

T=3 ResSWIM Grad-id               0.882433
T=3 ResSWIM Grad-dense            0.881200
T=5 ResSWIM Grad-id               0.877017
T=5 ResSWIM Grad-dense            0.872683
T=1 SWIM Grad                     0.851733
T=1 SWIM Unif                     0.847283
T=1 Dense                         0.823217
T=3 ResSWIM Grad-dense UPDENSE    0.822167
T=5 ResSWIM Grad-dense UPDENSE    0.821667
T=3 ResSWIM Grad-id UPDENSE       0.820233
T=5 ResSWIM Grad-id UPDENSE       0.819850
T=3 ResDense                      0.814833
T=5 ResDense                      0.803767
dtype: float64

In [16]:
df["acc_train"].rank(axis=1, ascending=False).mean().sort_values()

T=3 ResSWIM Grad-id                1.0
T=3 ResSWIM Grad-dense             2.0
T=5 ResSWIM Grad-id                3.0
T=5 ResSWIM Grad-dense             4.0
T=1 SWIM Grad                      5.0
T=1 SWIM Unif                      6.0
T=1 Dense                          7.0
T=3 ResSWIM Grad-dense UPDENSE     8.0
T=5 ResSWIM Grad-dense UPDENSE     9.0
T=3 ResSWIM Grad-id UPDENSE       10.0
T=5 ResSWIM Grad-id UPDENSE       11.0
T=3 ResDense                      12.0
T=5 ResDense                      13.0
dtype: float64