In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import collections
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch import Tensor, tensor
import pandas as pd
import openml

#from aeon.regression.sklearn import RotationForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split


np.set_printoptions(precision=3, threshold=5) # Print options
device = "cuda" # torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# MNIST

In [2]:
from torchvision import datasets, transforms


def normalize_mean_std_traindata(X_train: Tensor, X_test: Tensor) -> Tuple[Tensor, Tensor]:
    mean = X_train.mean(dim=0)
    std = X_train.std(dim=0)
    X_train = (X_train - mean) / std
    X_test = (X_test - mean) / std

    X_train = torch.clip(X_train, -5, 5)
    X_test = torch.clip(X_test, -5, 5)
    return X_train, X_test


# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download and load the training data
mnist_path = "/home/nikita/hdd/MNIST"
trainset = datasets.MNIST(mnist_path, download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=False)

# Download and load the test data
testset = datasets.MNIST(mnist_path, download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset), shuffle=False)

# Flatten the data
X_train, y_train_cat = next(iter(trainloader))
X_train = X_train.view(len(trainset), -1).to(device)
X_test, y_test_cat = next(iter(testloader))
X_test = X_test.view(len(testset), -1).to(device)

# Convert train and test labels to one-hot encoding
y_train = nn.functional.one_hot(y_train_cat, num_classes=10).float().to(device)
y_test = nn.functional.one_hot(y_test_cat, num_classes=10).float().to(device)
y_train_cat = y_train_cat.to(device)
y_test_cat = y_test_cat.to(device)

# Normalize by mean and std
X_train, X_test = normalize_mean_std_traindata(X_train, X_test)
print(f"Train data shape: {X_train.shape}")
print(f"Train labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Test labels shape: {y_test.shape}")

Train data shape: torch.Size([60000, 784])
Train labels shape: torch.Size([60000, 10])
Test data shape: torch.Size([10000, 784])
Test labels shape: torch.Size([10000, 10])


# Logistic Regression

In [3]:
from models.base import LogisticRegression

model = LogisticRegression(
        n_classes = 10,
        l2_lambda = 0.001,
        max_iter = 300,
    )
X_train_pred = model.fit_transform(X_train, y_train)
X_test_pred = model(X_test)

print("X_test_pred", X_test_pred)

train_accuracy = (torch.argmax(X_train_pred, dim=1) == y_train_cat).float().mean().item()
test_accuracy = (torch.argmax(X_test_pred, dim=1) == y_test_cat).float().mean().item()

print(f"Train accuracy: {train_accuracy}")
print(f"Test accuracy: {test_accuracy}")

X_test_pred tensor([[ -0.2361, -10.2318,   0.4055,  ...,  11.1450,   0.0553,   3.3455],
        [  5.9069,   1.3996,  13.2116,  ..., -18.7633,   4.5858, -11.9112],
        [ -5.6929,   6.3866,   1.9411,  ...,   0.8476,   0.3768,  -1.5534],
        ...,
        [ -7.5355,  -7.2969,  -2.6505,  ...,   2.3439,   4.0803,   4.8392],
        [ -2.7582,  -1.8546,  -3.1137,  ...,  -4.0667,   6.5052,  -3.2216],
        [  2.7034, -10.4991,   4.8433,  ...,  -7.0398,  -0.4861,  -4.2308]],
       device='cuda:0', grad_fn=<AddmmBackward0>)
Train accuracy: 0.9334999918937683
Test accuracy: 0.9265999794006348


# GradientRFBoost

In [21]:
from models.random_feature_representation_boosting import GradientRFRBoostClassifier

model = GradientRFRBoostClassifier(
    in_dim = 784,
    hidden_dim = 512,
    n_classes = 10,
    randfeat_xt_dim = 512,
    randfeat_x0_dim = 512,
    n_layers = 3,
    l2_cls =  0.00001,
    l2_ghat = 0.0001,
    feature_type="SWIM",
    upscale_type = "SWIM",
    lbfgs_max_iter = 300,
    boost_lr = 1.0,
    use_batchnorm=True,
    do_linesearch= False,
    SWIM_scale=1.0,
    )
X_train_pred = model.fit_transform(X_train, y_train)
X_test_pred = model(X_test)

train_accuracy = (torch.argmax(X_train_pred, dim=1) == y_train_cat).float().mean().item()
test_accuracy = (torch.argmax(X_test_pred, dim=1) == y_test_cat).float().mean().item()

print(f"Train accuracy: {train_accuracy}")
print(f"Test accuracy: {test_accuracy}")

#TODO NEXT: add xtx0 to the classification case

Train accuracy: 0.994533360004425
Test accuracy: 0.9603999853134155


In [16]:
def see_results_for_every_layer(X_train, y_train, X_test, y_test, model, loss_fn):
    with torch.no_grad():
        X0_train = X_train
        X0_test = X_test

        X_train = model.upscale(X0_train)
        X_test = model.upscale(X0_test)

        pred_train = model.top_level_modules[0](X_train)
        pred_test = model.top_level_modules[0](X_test)

        ce = loss_fn(pred_train, y_train)
        ce_test = loss_fn(pred_test, y_test)
        acc = (pred_train.argmax(1) == y_train.argmax(1)).float().mean()
        acc_test = (pred_test.argmax(1) == y_test.argmax(1)).float().mean()
        print(f"Train ce at layer 0: {ce}")
        print(f"Test ce at layer 0: {ce_test}")
        print(f"Train acc at layer 0: {acc}")
        print(f"Test acc at layer 0: {acc_test}")
        print()
        
        for t, (feat_layer, ghat_layer, classifier, batchnorm) in enumerate(zip(model.random_feature_layers, 
                                                                     model.ghat_boosting_layers, 
                                                                     model.top_level_modules[1:],
                                                                     model.batchnorms)):
            features_train = feat_layer(X_train, X0_train)
            features_test = feat_layer(X_test, X0_test)
            X_train += model.boost_lr * ghat_layer(features_train)
            X_train = batchnorm(X_train)
            X_test  += model.boost_lr * ghat_layer(features_test)
            X_test = batchnorm(X_test)
            
            pred_train = classifier(X_train)
            pred_test = classifier(X_test)

            ce = loss_fn(pred_train, y_train)
            ce_test = loss_fn(pred_test, y_test)
            acc = (pred_train.argmax(1) == y_train.argmax(1)).float().mean()
            acc_test = (pred_test.argmax(1) == y_test.argmax(1)).float().mean()
            print(f"Train ce at layer {t+1}: {ce}")
            print(f"Test ce at layer {t+1}: {ce_test}")
            print(f"Train acc at layer {t+1}: {acc}")
            print(f"Test acc at layer {t+1}: {acc_test}")
            print()


see_results_for_every_layer(X_train, y_train, X_test, y_test, model, nn.functional.cross_entropy)

Train ce at layer 0: 0.3866678476333618
Test ce at layer 0: 0.3729083836078644
Train acc at layer 0: 0.8935500383377075
Test acc at layer 0: 0.8983999490737915

Train ce at layer 1: 0.24427926540374756
Test ce at layer 1: 0.2442425787448883
Train acc at layer 1: 0.9316666722297668
Test acc at layer 1: 0.9311999678611755

Train ce at layer 2: 0.23991598188877106
Test ce at layer 2: 0.24039645493030548
Train acc at layer 2: 0.9330166578292847
Test acc at layer 2: 0.932699978351593

Train ce at layer 3: 0.23564143478870392
Test ce at layer 3: 0.23655599355697632
Train acc at layer 3: 0.9344333410263062
Test acc at layer 3: 0.933899998664856



# End2End

In [13]:
from models.end2end import End2EndMLPResNet

model = End2EndMLPResNet(
    in_dim = X_train.shape[1],
    hidden_dim = 128,
    bottleneck_dim = 32,
    out_dim = 10,
    n_blocks = 4,
    lr = 0.01,
    end_lr_factor = 0.01,
    n_epochs = 20,
    weight_decay = 0.001,
    batch_size = 512
    )
X_train_pred = model.fit_transform(X_train, y_train)
X_test_pred = model(X_test)

print("X_test_pred", X_test_pred)

train_accuracy = (torch.argmax(X_train_pred, dim=1) == y_train_cat).float().mean().item()
test_accuracy = (torch.argmax(X_test_pred, dim=1) == y_test_cat).float().mean().item()

print(f"Train accuracy: {train_accuracy}")
print(f"Test accuracy: {test_accuracy}")

  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:21<00:00,  1.09s/it]

X_test_pred tensor([[-7.71678984165191650391e-04, -7.49208033084869384766e-03,
         -1.24663859605789184570e-03,  ...,
          1.00676012039184570312e+00, -1.60436332225799560547e-03,
         -8.16347450017929077148e-03],
        [-1.68493315577507019043e-02, -1.36995688080787658691e-02,
          1.01810526847839355469e+00,  ...,
          1.14518180489540100098e-02,  4.63806092739105224609e-04,
          2.97597795724868774414e-03],
        [ 2.02718377113342285156e-03,  9.92150068283081054688e-01,
         -2.91625410318374633789e-03,  ...,
         -7.71909952163696289062e-03,  4.19247895479202270508e-03,
         -1.08579769730567932129e-02],
        ...,
        [ 6.20144605636596679688e-03, -4.16323542594909667969e-04,
          7.67238438129425048828e-04,  ...,
         -8.91387462615966796875e-05,  8.60729813575744628906e-03,
         -2.18964368104934692383e-03],
        [ 4.13244962692260742188e-03,  8.91607254743576049805e-03,
         -3.58318537473678588867e-03,  .


