In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import collections
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch import Tensor, tensor
import pandas as pd
import openml

#from aeon.regression.sklearn import RotationForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from utils.utils import print_name, print_shape
from models import ResNet, NeuralEulerODE, RidgeCVModule, E2EResNet

np.set_printoptions(precision=3, threshold=5) # Print options

In [34]:
from models import FittableModule, create_layer
from ridge_ALOOCV import fit_ridge_ALOOCV

class StagewiseRandFeatBoostRegression(FittableModule):
    def __init__(self, 
                 generator: torch.Generator, 
                 hidden_dim: int = 128, 
                 bottleneck_dim: int = 128,
                 out_dim: int = 1,
                 n_layers: int = 5,
                 activation: nn.Module = nn.Tanh(),
                 l2_reg: float = 0.01,
                 feature_type = "SWIM", # "dense", identity
                 boost_lr: float = 1.0,
                 ):
        super(StagewiseRandFeatBoostRegression, self).__init__()
        self.generator = generator
        self.hidden_dim = hidden_dim
        self.bottleneck_dim = bottleneck_dim
        self.out_dim = out_dim
        self.n_layers = n_layers
        self.activation = activation
        self.l2_reg = l2_reg
        self.feature_type = feature_type
        self.boost_lr = boost_lr

        # save for now. for more memory efficient implementation, we can remove a lot of this
        self.W = []
        self.b = []
        self.alphas = []
        self.layers = []
        self.deltas = []


    def fit(self, X: Tensor, y: Tensor):
        # Create regressor W_0
        W, b, alpha = fit_ridge_ALOOCV(X, y)
        self.W.append(W)
        self.b.append(b)
        self.alphas.append(alpha)

        # Layerwise boosting
        for t in range(self.n_layers):
            # Step 1: Create random feature layer   
            layer = create_layer(self.generator, self.feature_type, self.hidden_dim, self.bottleneck_dim, self.activation)
            F, y = layer.fit(X, y)

            # Step 2: Obtain activation gradient and learn Delta
            # X shape (N, D) --- ResNet neurons
            # F shape (N, p) --- random features
            # y shape (N, d) --- target
            # r shape (N, D) --- residual at currect boosting iteration
            # W shape (D, d) --- top level classifier
            r = y - X @ W - b   # G = (y - X @ W - b) @ W.T
            SW, U = torch.linalg.eigh(W @ W.T)
            SF, V = torch.linalg.eigh(F.T @ F)
            Delta = (U.T @ W @ r.T @ F @ V) / (N*self.l2_reg + SW[:, None]*SF[None, :])
            Delta = (U @ Delta @ V.T).T
            #TODO de-center F and r, and include an intercept. How to do this for my special equation?

            # Step 3: Learn top level classifier
            X = X + self.boost_lr * F @ Delta
            W, b, alpha = fit_ridge_ALOOCV(X, y)

            # store
            self.layers.append(layer)
            self.deltas.append(Delta)
            self.W.append(W)
            self.b.append(b)
            self.alphas.append(alpha)

        return X @ W + b, y


    def forward(self, X: Tensor) -> Tensor:
        for layer, Delta in zip(self.layers, self.deltas):
            X = X + self.boost_lr * layer(X) @ Delta
        return X @ self.W[-1] + self.b[-1]
    
N = 100
D = 50
p = 30
d = 4
bottleneck_dim = 70

gen = torch.Generator().manual_seed(42)
X = torch.randn(N, D, generator=gen)
y = torch.randn(N, d, generator=gen)
model = StagewiseRandFeatBoostRegression(
        gen,
        hidden_dim = D,
        bottleneck_dim = bottleneck_dim,
        out_dim = d,
        n_layers = 5,
    )
_, _ = model.fit(X, y)

In [None]:
# from models import FittableModule, create_layer, ResidualBlock

# class RandFeatBoostRegression(FittableModule):
#     def __init__(self, 
#                  generator: torch.Generator, 
#                  in_dim: int = 1,
#                  hidden_size: int = 128, 
#                  out_dim: int = 1,
#                  n_blocks: int = 5,
#                  activation: nn.Module = nn.Tanh(),
#                  adam_lr: float = 1e-3,
#                  boost_lr: float = 1.0,
#                  epochs: int = 50,
#                  batch_size: int = 64,
#                  upscale_type = "SWIM", # "dense", identity
#                  second_in_resblock = "identity",
#                  ):
#         super(RandFeatBoostRegression, self).__init__()
#         self.generator = generator
#         self.hidden_size = hidden_size
#         self.out_dim = out_dim
#         self.n_blocks = n_blocks
#         self.activation = activation
#         self.adam_lr = adam_lr
#         self.boost_lr = boost_lr
#         self.epochs = epochs
#         self.batch_size = batch_size
#         self.upscale_type = upscale_type
#         self.second_in_resblock = second_in_resblock

#         self.upscale = create_layer(generator, upscale_type, in_dim, hidden_size, activation)
#         self.layers = []
#         self.deltas = []
#         self.regressors = []
#         self.reg = None


#     def fit(self, X: Tensor, y: Tensor):
#         device = X.device
#         X0 = X
#         X, y = self.upscale.fit(X, y)

#         # Create a CPU generator for DataLoader
#         data_loader_generator = torch.Generator(device='cpu')
#         data_loader_generator.manual_seed(self.generator.initial_seed())

#         # Layerwise boosting
#         for t in range(self.n_blocks):
#             layer = ResidualBlock(self.generator, self.hidden_size, self.hidden_size, self.upscale_type, self.second_in_resblock, self.activation)
#             layer.fit(X, y)

#             # Create top classifier
#             reg = RidgeCVModule()


#             #DELTA = nn.Parameter(torch.zeros(1, self.hidden_size, device=device))
#             DELTA = nn.Parameter(torch.zeros(1, 1, device=device))
#             if t > 0:
#                 classifier.weight.data = self.classifiers[-1].weight.data.clone()
#                 classifier.bias.data = self.classifiers[-1].bias.data.clone()

#             #data loader
#             dataset = torch.utils.data.TensorDataset(X, y)
#             loader = torch.utils.data.DataLoader(
#                 dataset, 
#                 batch_size=self.batch_size, 
#                 shuffle=True, 
#                 generator=data_loader_generator
#             )

#             #learn top level classifier and boost
#             params = list(classifier.parameters()) + [DELTA]
#             self.optimizer = torch.optim.Adam(params, lr=self.adam_lr, weight_decay=1e-5)
#             for epoch in tqdm(range(self.epochs)):
#                 for batch_X, batch_y in loader:
#                     self.optimizer.zero_grad()

#                     #forward pass
#                     FofX = layer(batch_X) - batch_X # due to how i programmed ResidualBlock...
#                     outputs = classifier(batch_X + DELTA * FofX)

#                     #loss and backprop
#                     loss = self.loss_fn(outputs, batch_y)
#                     loss.backward()
#                     self.optimizer.step()
            
#             #after convergence, update layers, deltas, and X
#             self.layers.append(layer)
#             self.deltas.append(DELTA)
#             self.classifiers.append(classifier)
#             with torch.no_grad():
#                 X = X + self.boost_lr * DELTA * (layer(X)-X)

#         self.classifier = classifier
#         return self(X0), y


#     def forward(self, X: Tensor) -> Tensor:
#         X = self.upscale(X)
#         for layer, DELTA in zip(self.layers, self.deltas):
#             FofX = layer(X) - X
#             X = X + self.boost_lr * DELTA * FofX
#         return self.classifier(X)

# OpenML code

In [35]:
# Fetch the collection with ID 353
collection = openml.study.get_suite(353)
dataset_ids = collection.data
metadata_list = []

# Fetch and process each dataset
for i, dataset_id in enumerate(dataset_ids):
    dataset = openml.datasets.get_dataset(dataset_id)
    X, y, categorical_indicator, attribute_names = dataset.get_data(
        target=dataset.default_target_attribute
    )
    X = np.array(X)
    y = np.array(y)[..., None]
    
    # Determine if the dataset has categorical features
    has_categorical = any(categorical_indicator)
    
    # Extract the required metadata
    metadata = {
        'dataset_id': dataset.id,
        'name': dataset.name,
        'n_obs': int(dataset.qualities['NumberOfInstances']),
        'n_features': int(dataset.qualities['NumberOfFeatures']),
        '%_unique_y': len(np.unique(y))/len(y),
        'n_unique_y': len(np.unique(y)),
        'has_categorical': has_categorical
    }
    
    metadata_list.append(metadata)
    print(f" {i+1}/{len(dataset_ids)} Processed dataset {dataset.id}: {dataset.name}")

# Create a DataFrame from the metadata list
df_metadata = pd.DataFrame(metadata_list).sort_values('%_unique_y', ascending=False).set_index("dataset_id")
df_metadata.sort_values('%_unique_y', ascending=True)

# Display the metadata DataFrame
df_metadata.loc[44962, "has_categorical"] = True
df_metadata

 1/35 Processed dataset 44956: abalone
 2/35 Processed dataset 44957: airfoil_self_noise
 3/35 Processed dataset 44958: auction_verification
 4/35 Processed dataset 44959: concrete_compressive_strength
 5/35 Processed dataset 44963: physiochemical_protein
 6/35 Processed dataset 44964: superconductivity
 7/35 Processed dataset 44965: geographical_origin_of_music
 8/35 Processed dataset 44966: solar_flare
 9/35 Processed dataset 44969: naval_propulsion_plant
 10/35 Processed dataset 44971: white_wine
 11/35 Processed dataset 44972: red_wine
 12/35 Processed dataset 44973: grid_stability
 13/35 Processed dataset 44974: video_transcoding
 14/35 Processed dataset 44975: wave_energy
 15/35 Processed dataset 44976: sarcos
 16/35 Processed dataset 44977: california_housing
 17/35 Processed dataset 44978: cpu_activity
 18/35 Processed dataset 44979: diamonds
 19/35 Processed dataset 44980: kin8nm
 20/35 Processed dataset 44981: pumadyn32nh
 21/35 Processed dataset 44983: miami_housing
 22/35 P

Unnamed: 0_level_0,name,n_obs,n_features,%_unique_y,n_unique_y,has_categorical
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
44973,grid_stability,10000,13,1.0,10000,False
44975,wave_energy,72000,49,0.999903,71993,False
44980,kin8nm,8192,9,0.999878,8191,False
44981,pumadyn32nh,8192,33,0.999878,8191,False
45402,space_ga,3107,7,0.999356,3105,False
44958,auction_verification,2043,8,0.998042,2039,True
44994,cars,804,18,0.992537,798,False
44957,airfoil_self_noise,1503,6,0.968729,1456,False
44970,QSAR_fish_toxicity,908,7,0.910793,827,False
44959,concrete_compressive_strength,1030,9,0.91068,938,False


# Download single dataset

In [36]:
def load_openml_dataset(dataset_id, 
                        normalize_X:bool = True,
                        normalize_y:bool = True,
                        train_test_size:float = 0.7,
                        split_seed:int = 0,
                        device="cpu",
                        ) -> Tuple[np.ndarray, np.ndarray]:
    # Fetch dataset from OpenML by its ID
    dataset = openml.datasets.get_dataset(dataset_id)
    df, _, categorical_indicator, attribute_names = dataset.get_data()
    df.dropna(inplace=True)
    y = np.array(df.pop(dataset.default_target_attribute))[..., None]
    X = np.array(df)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_test_size, random_state=split_seed)

    #normalize
    if normalize_X:
        X_train, X_test = normalize_mean_std_traindata(X_train, X_test)
        X_train = np.clip(X_train, -3, 3)
        X_test = np.clip(X_test, -3, 3)
    if normalize_y:
        y_train, y_test = normalize_mean_std_traindata(y_train, y_test)

    return (torch.tensor(X_train.astype(np.float32), requires_grad=False, device=device),
            torch.tensor(X_test.astype(np.float32), requires_grad=False, device=device),
            torch.tensor(y_train.astype(np.float32), requires_grad=False, device=device),
            torch.tensor(y_test.astype(np.float32), requires_grad=False, device=device))

#dataset_id = 44971  # Replace with the dataset ID you want
dataset_id = 44971 #44970
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id, False, False)


# Plot Activations

In [37]:
# import torch
# import torch.nn as nn
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots

# def get_activation(name, activations):
#     def hook(model, input, output):
#         activations[name] = output.detach()
#     return hook


# def register_hooks(model, activations):
#     for name, layer in model.named_modules():
#         print(name)
#         if ".dense" not in name:
#             layer.register_forward_hook(get_activation(name, activations))



# def neuron_distribution_for_each_layer(X_train, y_train, X_test):
#     D = X_train.shape[1]
#     n_layers = 2
#     g1 = torch.Generator().manual_seed(0)
#     model = SampledEulerODE(g1, D, 10*D, n_layers, upsample_module='sampled', sampling_method='gradient')
#     #model = SampledResNet(g1, D, 10*D, 10*D, n_layers, upsample_module='sampled', sampling_method='gradient')
#     model.fit(X_train, y_train)

#     activations = {}
#     register_hooks(model, activations)
    
#     # Forward pass
#     model(X_test)
    
#     # Plot input data distribution
#     fig = make_subplots(rows=1, cols=1)
#     fig.add_trace(go.Histogram(x=X_train.flatten().cpu().numpy(), nbinsx=50, name='Train', histnorm='probability density', opacity=0.5))
#     fig.add_trace(go.Histogram(x=X_test.flatten().cpu().numpy(), nbinsx=50, name='Test', histnorm='probability density', opacity=0.5))
#     fig.update_layout(title_text='Input Data Distribution', xaxis_title='Input Feature Value', yaxis_title='Probability Density', barmode='overlay')
#     fig.show()

#     # Plot activations
#     for name, activation in activations.items():
#         fig = make_subplots(rows=1, cols=1)
#         fig.add_trace(go.Histogram(x=activation.flatten().cpu().numpy(), nbinsx=50, name='Activation', histnorm='probability density', opacity=0.5))
#         fig.update_layout(title_text=f'Activations at Layer: {name}', xaxis_title='Activation Value', yaxis_title='Probability Density', barmode='overlay')
#         fig.show()


# neuron_distribution_for_each_layer(X_train, y_train, X_test)

# Fit on a dataset

In [None]:
def run_allmodels_1dataset(
        generator: torch.Generator,
        X_train: Tensor,
        y_train: Tensor,
        X_test: Tensor,
        y_test: Tensor,
        ):
    
    D = X_train.shape[1]
    hidden_size = 512
    bottleneck_dim = hidden_size

    # (name, model, kwargs). kwargs separate to save memory
    model_list = [
        ["RidgeCV", RidgeCVModule, {}],

        ["T=3 End2End", E2EResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": bottleneck_dim,
                "out_dim": 1,
                "n_blocks": 2,
                "activation": nn.Tanh(),
                "loss": nn.MSELoss(),
                "lr": 1e-3,
                "epochs": 50,
                "batch_size": 64,}
                ],

        ["T=1 Dense", ResNet,
                {"generator": generator,
                 "in_dim": D,
                 "hidden_size": hidden_size,
                 "bottleneck_dim": None,
                 "n_blocks": 0,
                 "upsample_layer": "dense",}
                 ],

        ["T=1 SWIM Grad", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": None,
                "n_blocks": 0,
                "upsample_layer": "SWIM",}
                ],
        
        ["T=1 SWIM Unif", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": None,
                "n_blocks": 0,
                "upsample_layer": "SWIM",
                "sampling_method": "uniform",}
                ],
    ]

    for n_blocks in [2, 4]:
        model_list += [
        [f"T={n_blocks+1} ResSWIM Grad-dense", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": bottleneck_dim,
                "n_blocks": n_blocks,
                "upsample_layer": "SWIM",
                "res_layer1": "SWIM",
                "res_layer2": "dense",}
                ],

        [f"T={n_blocks+1} ResSWIM Grad-id", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "SWIM",
                "res_layer1": "SWIM",
                "res_layer2": "identity",}
                ],
                
        [f"T={n_blocks+1} ResDense", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "dense",
                "res_layer1": "dense",
                "res_layer2": "identity",}
                ],
        ]
        
    for n_layers in range(0, 30, 5):
        model_list += [
        [f"StagewiseRandFeatBoost_{n_layers}", StagewiseRandFeatBoostRegression,
                {"generator": generator,
                "hidden_dim": D,
                "bottleneck_dim": bottleneck_dim,
                "out_dim": 1,
                "n_layers": n_layers,
                "activation": nn.Tanh(),
                "l2_reg": 0.01,
                "feature_type": "SWIM",
                "boost_lr": 1.0,}
                ],
        ]
    
    results = []
    model_names = []
    for name, model, model_args in model_list:
        t0 = time.perf_counter()
        model = model(**model_args).to(X_train.device)
        pred_train, _ = model.fit(X_train, y_train)
        t1 = time.perf_counter()
        pred_test = model(X_test)
        t2 = time.perf_counter()
        rmse_train = root_mean_squared_error(y_train.cpu(), pred_train.cpu().detach()) 
        rmse_test = root_mean_squared_error(y_test.cpu(), pred_test.cpu().detach())

        result = np.array( [rmse_train, rmse_test, t1-t0, t2-t1] )
        results.append( result )
        model_names.append( name )

    return model_names, results



def run_all_experiments(
        dataset_ids: List,
        name_save: str = "PLACEHOLDER",
        device="cuda",
        ):
    # Fetch and process each dataset
    experiments = {}
    for i, dataset_id in enumerate(dataset_ids):
        X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id, split_seed=0, device=device)
        generator = torch.Generator(device=device).manual_seed(999)
        results = run_allmodels_1dataset(
            generator, X_train, y_train, X_test, y_test, 
            )
        experiments[dataset_id] = results
        print(f" {i+1}/{len(dataset_ids)} Processed dataset {dataset_id}")

    # Save results
    # Assuming experiments is a dict where keys are dataset names and values are tuples (model_names, results)
    attributes = ["RMSE_train", "RMSE_test", "t_fit", "t_feat"]
    data_list = []
    # Process the data
    for dataset_name, (model_names, results) in experiments.items():
        dataset_data = {}
        for attr_idx, attribute in enumerate(attributes):
            for model_idx, model_name in enumerate(model_names):
                dataset_data[(attribute, model_name)] = results[model_idx][attr_idx]
        data_list.append(pd.DataFrame(dataset_data, index=[dataset_name]))

    # Combine all datasets into a single DataFrame
    df = pd.concat(data_list)
    df = df.sort_index(axis=1)
    print(df)
    df.to_pickle(f"OpenML_reg_{name_save}.pkl")
    return df

In [68]:
dataset_ids_not_categorical = list(df_metadata.query("has_categorical == False").index.values)
dataset_ids_not_categorical = sorted([int(x) for x in dataset_ids_not_categorical])
run_all_experiments(dataset_ids_not_categorical, name_save="FIRSTBOOST")

100%|██████████| 50/50 [00:03<00:00, 14.43it/s]


 1/20 Processed dataset 44957


100%|██████████| 50/50 [00:02<00:00, 20.80it/s]


 2/20 Processed dataset 44959


100%|██████████| 50/50 [00:01<00:00, 29.12it/s]


 3/20 Processed dataset 44960


100%|██████████| 50/50 [01:28<00:00,  1.77s/it]


 4/20 Processed dataset 44963


100%|██████████| 50/50 [00:40<00:00,  1.24it/s]


 5/20 Processed dataset 44964


100%|██████████| 50/50 [00:02<00:00, 22.40it/s]


 6/20 Processed dataset 44965


100%|██████████| 50/50 [00:22<00:00,  2.18it/s]


 7/20 Processed dataset 44969


100%|██████████| 50/50 [00:01<00:00, 26.55it/s]


 8/20 Processed dataset 44970


100%|██████████| 50/50 [00:09<00:00,  5.18it/s]


 9/20 Processed dataset 44971


100%|██████████| 50/50 [00:03<00:00, 14.66it/s]


 10/20 Processed dataset 44972


100%|██████████| 50/50 [00:19<00:00,  2.59it/s]


 11/20 Processed dataset 44973


100%|██████████| 50/50 [02:14<00:00,  2.69s/it]


 12/20 Processed dataset 44975


100%|██████████| 50/50 [01:18<00:00,  1.57s/it]


 13/20 Processed dataset 44976


100%|██████████| 50/50 [00:38<00:00,  1.31it/s]


 14/20 Processed dataset 44977


100%|██████████| 50/50 [00:16<00:00,  3.10it/s]


 15/20 Processed dataset 44978


100%|██████████| 50/50 [00:16<00:00,  3.07it/s]


 16/20 Processed dataset 44980


100%|██████████| 50/50 [00:16<00:00,  3.11it/s]


 17/20 Processed dataset 44981


100%|██████████| 50/50 [00:27<00:00,  1.85it/s]


 18/20 Processed dataset 44983


100%|██████████| 50/50 [00:01<00:00, 26.09it/s]


 19/20 Processed dataset 44994


100%|██████████| 50/50 [00:06<00:00,  8.05it/s]


 20/20 Processed dataset 45402
      RMSE_test                                                     \
        RidgeCV StagewiseRandFeatBoost_0 StagewiseRandFeatBoost_10   
44957  0.674484                 0.674487                  0.402275   
44959  0.542088                 0.542044                  0.329459   
44960  0.304327                 0.304327                  0.169761   
44963  0.839762                 0.839761                  0.730424   
44964  0.517322                 0.517327                  0.411197   
44965  0.914663                 0.914638                  0.995283   
44969  0.413739                 0.408667                  0.200477   
44970  0.666021                 0.666545                  0.632870   
44971  0.878211                 0.878160                  0.827621   
44972  0.766536                 0.766350                  0.762375   
44973  0.595158                 0.595154                  0.235134   
44975  0.006491                 0.006491                  0

Unnamed: 0_level_0,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,...,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit
Unnamed: 0_level_1,RidgeCV,StagewiseRandFeatBoost_0,StagewiseRandFeatBoost_10,StagewiseRandFeatBoost_15,StagewiseRandFeatBoost_20,StagewiseRandFeatBoost_25,StagewiseRandFeatBoost_5,T=1 Dense,T=1 SWIM Grad,T=1 SWIM Unif,...,T=1 Dense,T=1 SWIM Grad,T=1 SWIM Unif,T=3 End2End,T=3 ResDense,T=3 ResSWIM Grad-dense,T=3 ResSWIM Grad-id,T=5 ResDense,T=5 ResSWIM Grad-dense,T=5 ResSWIM Grad-id
44957,0.674484,0.674487,0.402275,0.371531,0.363176,0.381567,0.410049,0.42468,0.476077,0.500399,...,0.037149,0.048563,0.026278,3.478067,0.044773,0.039301,0.04036,0.044366,0.096211,0.066376
44959,0.542088,0.542044,0.329459,0.35048,0.340559,0.33459,0.360222,0.377728,0.423298,0.44655,...,0.01691,0.028934,0.027148,2.412283,0.027065,0.02322,0.036922,0.021435,0.030333,0.026865
44960,0.304327,0.304327,0.169761,0.093226,0.100749,0.074538,0.232315,0.200248,0.259649,0.264216,...,0.022585,0.039492,0.034956,1.725706,0.033834,0.045645,0.047782,0.051409,0.053225,0.065124
44963,0.839762,0.839761,0.730424,0.723815,0.719567,0.717562,0.740267,0.768821,0.792099,0.793024,...,1.129858,1.001937,1.027092,88.67626,1.006271,1.023794,1.028898,1.076974,1.083936,1.082646
44964,0.517322,0.517327,0.411197,0.401247,0.399572,0.393684,0.426311,0.507481,0.491303,0.492552,...,0.409294,0.372071,0.402858,40.495464,0.41461,0.402672,0.398201,0.369224,0.460022,0.444525
44965,0.914663,0.914638,0.995283,1.148351,1.233783,1.440395,0.902871,0.92286,0.906198,0.905319,...,0.013971,0.029078,0.025057,2.243213,0.035805,0.044113,0.040106,0.031932,0.056503,0.153849
44969,0.413739,0.408667,0.200477,0.102135,0.091919,0.508549,0.584405,0.034592,0.067562,0.449469,...,0.138217,0.198264,0.234294,22.922965,0.211904,0.241977,0.202942,0.220156,0.230138,0.257736
44970,0.666021,0.666545,0.63287,0.621211,0.650791,0.657732,0.627557,0.610808,0.638345,0.648441,...,0.024857,0.032486,0.032628,1.891859,0.041233,0.040841,0.054705,0.042325,0.059595,0.045046
44971,0.878211,0.87816,0.827621,0.836399,0.830669,0.841934,0.825289,0.829079,0.832194,0.835323,...,0.05931,0.066782,0.120872,9.679895,0.115978,0.093079,0.1206,0.145675,0.161961,0.142813
44972,0.766536,0.76635,0.762375,0.760444,0.760841,0.749766,0.755722,0.760606,0.770519,0.771048,...,0.037638,0.042334,0.040502,3.422929,0.045032,0.051529,0.050775,0.040645,0.039347,0.033065


In [70]:
df_reg = pd.read_pickle("OpenML_reg_FIRSTBOOST.pkl")
df_reg["RMSE_test"].mean().sort_values()

T=3 End2End                  0.440158
StagewiseRandFeatBoost_15    0.445969
StagewiseRandFeatBoost_20    0.450481
StagewiseRandFeatBoost_10    0.451058
StagewiseRandFeatBoost_5     0.478081
StagewiseRandFeatBoost_25    0.480310
T=5 ResSWIM Grad-dense       0.485768
T=3 ResSWIM Grad-id          0.486900
T=3 ResSWIM Grad-dense       0.487529
T=5 ResSWIM Grad-id          0.489470
T=1 Dense                    0.497886
T=1 SWIM Grad                0.498991
T=3 ResDense                 0.511270
T=1 SWIM Unif                0.522699
T=5 ResDense                 0.525264
StagewiseRandFeatBoost_0     0.579274
RidgeCV                      0.579533
dtype: float64

In [71]:
df_reg["RMSE_test"].rank(axis=1).mean().sort_values()

StagewiseRandFeatBoost_20     4.60
StagewiseRandFeatBoost_15     4.70
StagewiseRandFeatBoost_25     5.20
StagewiseRandFeatBoost_10     5.80
StagewiseRandFeatBoost_5      6.20
T=3 End2End                   6.70
T=5 ResSWIM Grad-dense        8.35
T=3 ResSWIM Grad-id           8.80
T=1 Dense                     8.85
T=3 ResSWIM Grad-dense        8.85
T=5 ResSWIM Grad-id           9.70
T=1 SWIM Grad                10.90
T=3 ResDense                 11.25
T=5 ResDense                 11.80
T=1 SWIM Unif                12.00
StagewiseRandFeatBoost_0     14.45
RidgeCV                      14.85
dtype: float64

In [72]:
df_reg["RMSE_train"].mean().sort_values()

T=3 End2End                  0.285152
StagewiseRandFeatBoost_25    0.355267
StagewiseRandFeatBoost_20    0.361087
StagewiseRandFeatBoost_15    0.370432
StagewiseRandFeatBoost_10    0.396441
StagewiseRandFeatBoost_5     0.437691
T=3 ResSWIM Grad-dense       0.457572
T=5 ResSWIM Grad-dense       0.457998
T=5 ResSWIM Grad-id          0.459707
T=3 ResSWIM Grad-id          0.460334
T=1 Dense                    0.471736
T=1 SWIM Grad                0.476854
T=3 ResDense                 0.480694
T=5 ResDense                 0.489757
T=1 SWIM Unif                0.499885
StagewiseRandFeatBoost_0     0.568376
RidgeCV                      0.569632
dtype: float64

In [73]:
df_reg["RMSE_train"].rank(axis=1).mean().sort_values()

StagewiseRandFeatBoost_25     2.05
StagewiseRandFeatBoost_20     2.70
StagewiseRandFeatBoost_15     3.95
T=3 End2End                   4.05
StagewiseRandFeatBoost_10     5.45
StagewiseRandFeatBoost_5      6.95
T=3 ResSWIM Grad-dense        9.50
T=5 ResSWIM Grad-dense        9.65
T=1 Dense                    10.00
T=3 ResSWIM Grad-id          10.20
T=5 ResSWIM Grad-id          10.60
T=3 ResDense                 11.05
T=5 ResDense                 11.25
T=1 SWIM Grad                11.90
T=1 SWIM Unif                12.75
StagewiseRandFeatBoost_0     15.15
RidgeCV                      15.80
dtype: float64

In [None]:
# NEXT: implement boosting for the RandFeat models
# ALSO: I should also do boosting for learned Nets ...



#TODO NOTE NOTE next: add end2end and randfeatboost to regression models
#          i might also need to implement the gradient approach before this?  maybe not.  at least do line search probabily