In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import collections
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch import Tensor, tensor
import pandas as pd
import openml

#from aeon.regression.sklearn import RotationForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from utils.utils import print_name, print_shape
from models import ResNet, NeuralEulerODE, RidgeCVModule, E2EResNet

np.set_printoptions(precision=3, threshold=5) # Print options

In [None]:
from models import FittableModule, create_layer, ResidualBlock

class StagewiseRandFeatBoostRegression(FittableModule):
    def __init__(self, 
                 generator: torch.Generator, 
                 hidden_dim: int = 128, 
                 bottleneck_dim: int = 128,
                 out_dim: int = 1,
                 n_layers: int = 5,
                 activation: nn.Module = nn.Tanh(),

                 #TODO 
                 upscale_type = "SWIM", # "dense", identity
                 second_in_resblock = "identity",
                 ):
        super(StagewiseRandFeatBoostRegression, self).__init__()
        self.generator = generator
        self.hidden_dim = hidden_dim
        self.bottleneck_dim = bottleneck_dim
        self.out_dim = out_dim
        self.n_layers = n_layers
        self.activation = activation

        # TODO
        self.upscale_type = upscale_type
        self.second_in_resblock = second_in_resblock

        #create 

        # self.upscale = create_layer(generator, upscale_type, in_dim, hidden_size, activation)
        # self.layers = []
        # self.deltas = []
        # self.regressors = []
        # self.reg = None


    def fit(self, X: Tensor, y: Tensor):


        # Create regressor W_0
        regressor = RidgeCVModule()
        regressor.fit(X, y)
        self.regressors.append(regressor)

        # Layerwise boosting
        for t in range(self.n_layers):
            X0 = X

            # Step 1: Create random feature layer   
            layer = create_layer(self.generator, "SWIM", self.hidden_dim, self.bottleneck_dim, self.activation)
            X, y = layer.fit(X, y)
            self.layers.append(layer)

            # Step 2: Obtain activation gradient and learn Delta
            # X shape (N, D)
            # y shape (N, p)
            # W shape (D, p)
            # r shape (D, p)

            SW, U = np.linalg.eigh(W.T @ W)
            SX, V = np.linalg.eigh(x.T @ x)
            Delta = (U.T @ W.T @ r.T @ x @ V) / (N*lambda_reg + SW[:, None]*SX[None, :])
            Delta = U @ Delta @ V.T

            # Step 3: Learn top level classifier


        return self(X0), y


    def forward(self, X: Tensor) -> Tensor:
        return X

In [None]:
# from models import FittableModule, create_layer, ResidualBlock

# class RandFeatBoostRegression(FittableModule):
#     def __init__(self, 
#                  generator: torch.Generator, 
#                  in_dim: int = 1,
#                  hidden_size: int = 128, 
#                  out_dim: int = 1,
#                  n_blocks: int = 5,
#                  activation: nn.Module = nn.Tanh(),
#                  adam_lr: float = 1e-3,
#                  boost_lr: float = 1.0,
#                  epochs: int = 50,
#                  batch_size: int = 64,
#                  upscale_type = "SWIM", # "dense", identity
#                  second_in_resblock = "identity",
#                  ):
#         super(RandFeatBoostRegression, self).__init__()
#         self.generator = generator
#         self.hidden_size = hidden_size
#         self.out_dim = out_dim
#         self.n_blocks = n_blocks
#         self.activation = activation
#         self.adam_lr = adam_lr
#         self.boost_lr = boost_lr
#         self.epochs = epochs
#         self.batch_size = batch_size
#         self.upscale_type = upscale_type
#         self.second_in_resblock = second_in_resblock

#         self.upscale = create_layer(generator, upscale_type, in_dim, hidden_size, activation)
#         self.layers = []
#         self.deltas = []
#         self.regressors = []
#         self.reg = None


#     def fit(self, X: Tensor, y: Tensor):
#         device = X.device
#         X0 = X
#         X, y = self.upscale.fit(X, y)

#         # Create a CPU generator for DataLoader
#         data_loader_generator = torch.Generator(device='cpu')
#         data_loader_generator.manual_seed(self.generator.initial_seed())

#         # Layerwise boosting
#         for t in range(self.n_blocks):
#             layer = ResidualBlock(self.generator, self.hidden_size, self.hidden_size, self.upscale_type, self.second_in_resblock, self.activation)
#             layer.fit(X, y)

#             # Create top classifier
#             reg = RidgeCVModule()


#             #DELTA = nn.Parameter(torch.zeros(1, self.hidden_size, device=device))
#             DELTA = nn.Parameter(torch.zeros(1, 1, device=device))
#             if t > 0:
#                 classifier.weight.data = self.classifiers[-1].weight.data.clone()
#                 classifier.bias.data = self.classifiers[-1].bias.data.clone()

#             #data loader
#             dataset = torch.utils.data.TensorDataset(X, y)
#             loader = torch.utils.data.DataLoader(
#                 dataset, 
#                 batch_size=self.batch_size, 
#                 shuffle=True, 
#                 generator=data_loader_generator
#             )

#             #learn top level classifier and boost
#             params = list(classifier.parameters()) + [DELTA]
#             self.optimizer = torch.optim.Adam(params, lr=self.adam_lr, weight_decay=1e-5)
#             for epoch in tqdm(range(self.epochs)):
#                 for batch_X, batch_y in loader:
#                     self.optimizer.zero_grad()

#                     #forward pass
#                     FofX = layer(batch_X) - batch_X # due to how i programmed ResidualBlock...
#                     outputs = classifier(batch_X + DELTA * FofX)

#                     #loss and backprop
#                     loss = self.loss_fn(outputs, batch_y)
#                     loss.backward()
#                     self.optimizer.step()
            
#             #after convergence, update layers, deltas, and X
#             self.layers.append(layer)
#             self.deltas.append(DELTA)
#             self.classifiers.append(classifier)
#             with torch.no_grad():
#                 X = X + self.boost_lr * DELTA * (layer(X)-X)

#         self.classifier = classifier
#         return self(X0), y


#     def forward(self, X: Tensor) -> Tensor:
#         X = self.upscale(X)
#         for layer, DELTA in zip(self.layers, self.deltas):
#             FofX = layer(X) - X
#             X = X + self.boost_lr * DELTA * FofX
#         return self.classifier(X)

# OpenML code

In [2]:
# Fetch the collection with ID 353
collection = openml.study.get_suite(353)
dataset_ids = collection.data
metadata_list = []

# Fetch and process each dataset
for i, dataset_id in enumerate(dataset_ids):
    dataset = openml.datasets.get_dataset(dataset_id)
    X, y, categorical_indicator, attribute_names = dataset.get_data(
        target=dataset.default_target_attribute
    )
    X = np.array(X)
    y = np.array(y)[..., None]
    
    # Determine if the dataset has categorical features
    has_categorical = any(categorical_indicator)
    
    # Extract the required metadata
    metadata = {
        'dataset_id': dataset.id,
        'name': dataset.name,
        'n_obs': int(dataset.qualities['NumberOfInstances']),
        'n_features': int(dataset.qualities['NumberOfFeatures']),
        '%_unique_y': len(np.unique(y))/len(y),
        'n_unique_y': len(np.unique(y)),
        'has_categorical': has_categorical
    }
    
    metadata_list.append(metadata)
    print(f" {i+1}/{len(dataset_ids)} Processed dataset {dataset.id}: {dataset.name}")

# Create a DataFrame from the metadata list
df_metadata = pd.DataFrame(metadata_list).sort_values('%_unique_y', ascending=False).set_index("dataset_id")
df_metadata.sort_values('%_unique_y', ascending=True)

# Display the metadata DataFrame
df_metadata.loc[44962, "has_categorical"] = True
df_metadata

 1/35 Processed dataset 44956: abalone
 2/35 Processed dataset 44957: airfoil_self_noise
 3/35 Processed dataset 44958: auction_verification
 4/35 Processed dataset 44959: concrete_compressive_strength
 5/35 Processed dataset 44963: physiochemical_protein
 6/35 Processed dataset 44964: superconductivity
 7/35 Processed dataset 44965: geographical_origin_of_music
 8/35 Processed dataset 44966: solar_flare
 9/35 Processed dataset 44969: naval_propulsion_plant
 10/35 Processed dataset 44971: white_wine
 11/35 Processed dataset 44972: red_wine
 12/35 Processed dataset 44973: grid_stability
 13/35 Processed dataset 44974: video_transcoding
 14/35 Processed dataset 44975: wave_energy
 15/35 Processed dataset 44976: sarcos
 16/35 Processed dataset 44977: california_housing
 17/35 Processed dataset 44978: cpu_activity
 18/35 Processed dataset 44979: diamonds
 19/35 Processed dataset 44980: kin8nm
 20/35 Processed dataset 44981: pumadyn32nh
 21/35 Processed dataset 44983: miami_housing
 22/35 P

Unnamed: 0_level_0,name,n_obs,n_features,%_unique_y,n_unique_y,has_categorical
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
44973,grid_stability,10000,13,1.0,10000,False
44975,wave_energy,72000,49,0.999903,71993,False
44980,kin8nm,8192,9,0.999878,8191,False
44981,pumadyn32nh,8192,33,0.999878,8191,False
45402,space_ga,3107,7,0.999356,3105,False
44958,auction_verification,2043,8,0.998042,2039,True
44994,cars,804,18,0.992537,798,False
44957,airfoil_self_noise,1503,6,0.968729,1456,False
44970,QSAR_fish_toxicity,908,7,0.910793,827,False
44959,concrete_compressive_strength,1030,9,0.91068,938,False


# Download single dataset

In [3]:
def load_openml_dataset(dataset_id, 
                        normalize_X:bool = True,
                        normalize_y:bool = True,
                        train_test_size:float = 0.7,
                        split_seed:int = 0,
                        device="cpu",
                        ) -> Tuple[np.ndarray, np.ndarray]:
    # Fetch dataset from OpenML by its ID
    dataset = openml.datasets.get_dataset(dataset_id)
    df, _, categorical_indicator, attribute_names = dataset.get_data()
    df.dropna(inplace=True)
    y = np.array(df.pop(dataset.default_target_attribute))[..., None]
    X = np.array(df)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_test_size, random_state=split_seed)

    #normalize
    if normalize_X:
        X_train, X_test = normalize_mean_std_traindata(X_train, X_test)
        X_train = np.clip(X_train, -3, 3)
        X_test = np.clip(X_test, -3, 3)
    if normalize_y:
        y_train, y_test = normalize_mean_std_traindata(y_train, y_test)

    return (torch.tensor(X_train.astype(np.float32), requires_grad=False, device=device),
            torch.tensor(X_test.astype(np.float32), requires_grad=False, device=device),
            torch.tensor(y_train.astype(np.float32), requires_grad=False, device=device),
            torch.tensor(y_test.astype(np.float32), requires_grad=False, device=device))

#dataset_id = 44971  # Replace with the dataset ID you want
dataset_id = 44971 #44970
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id, False, False)


# Plot Activations

In [4]:
# import torch
# import torch.nn as nn
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots

# def get_activation(name, activations):
#     def hook(model, input, output):
#         activations[name] = output.detach()
#     return hook


# def register_hooks(model, activations):
#     for name, layer in model.named_modules():
#         print(name)
#         if ".dense" not in name:
#             layer.register_forward_hook(get_activation(name, activations))



# def neuron_distribution_for_each_layer(X_train, y_train, X_test):
#     D = X_train.shape[1]
#     n_layers = 2
#     g1 = torch.Generator().manual_seed(0)
#     model = SampledEulerODE(g1, D, 10*D, n_layers, upsample_module='sampled', sampling_method='gradient')
#     #model = SampledResNet(g1, D, 10*D, 10*D, n_layers, upsample_module='sampled', sampling_method='gradient')
#     model.fit(X_train, y_train)

#     activations = {}
#     register_hooks(model, activations)
    
#     # Forward pass
#     model(X_test)
    
#     # Plot input data distribution
#     fig = make_subplots(rows=1, cols=1)
#     fig.add_trace(go.Histogram(x=X_train.flatten().cpu().numpy(), nbinsx=50, name='Train', histnorm='probability density', opacity=0.5))
#     fig.add_trace(go.Histogram(x=X_test.flatten().cpu().numpy(), nbinsx=50, name='Test', histnorm='probability density', opacity=0.5))
#     fig.update_layout(title_text='Input Data Distribution', xaxis_title='Input Feature Value', yaxis_title='Probability Density', barmode='overlay')
#     fig.show()

#     # Plot activations
#     for name, activation in activations.items():
#         fig = make_subplots(rows=1, cols=1)
#         fig.add_trace(go.Histogram(x=activation.flatten().cpu().numpy(), nbinsx=50, name='Activation', histnorm='probability density', opacity=0.5))
#         fig.update_layout(title_text=f'Activations at Layer: {name}', xaxis_title='Activation Value', yaxis_title='Probability Density', barmode='overlay')
#         fig.show()


# neuron_distribution_for_each_layer(X_train, y_train, X_test)

# Fit on a dataset

In [5]:
def run_allmodels_1dataset(
        generator: torch.Generator,
        X_train: Tensor,
        y_train: Tensor,
        X_test: Tensor,
        y_test: Tensor,
        ):
    
    D = X_train.shape[1]
    hidden_size = 128
    bottleneck_dim = 2*hidden_size

    # (name, model, kwargs). kwargs separate to save memory
    model_list = [
        ["RidgeCV", RidgeCVModule, {}],

        ["T=3 End2End", E2EResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": bottleneck_dim,
                "out_dim": 1,
                "n_blocks": 2,
                "activation": nn.Tanh(),
                "loss": nn.MSELoss(),
                "lr": 1e-3,
                "epochs": 50,
                "batch_size": 64,}
                ],

        ["T=1 Dense", ResNet,
                {"generator": generator,
                 "in_dim": D,
                 "hidden_size": hidden_size,
                 "bottleneck_dim": None,
                 "n_blocks": 0,
                 "upsample_layer": "dense",}
                 ],

        ["T=1 SWIM Grad", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": None,
                "n_blocks": 0,
                "upsample_layer": "SWIM",}
                ],
        
        ["T=1 SWIM Unif", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": None,
                "n_blocks": 0,
                "upsample_layer": "SWIM",
                "sampling_method": "uniform",}
                ],
    ]

    for n_blocks in [2, 4]:
        model_list += [
        [f"T={n_blocks+1} ResSWIM Grad-dense", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": bottleneck_dim,
                "n_blocks": n_blocks,
                "upsample_layer": "SWIM",
                "res_layer1": "SWIM",
                "res_layer2": "dense",}
                ],

        [f"T={n_blocks+1} ResSWIM Grad-id", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "SWIM",
                "res_layer1": "SWIM",
                "res_layer2": "identity",}
                ],
                
        [f"T={n_blocks+1} ResDense", ResNet,
                {"generator": generator,
                "in_dim": D,
                "hidden_size": hidden_size,
                "bottleneck_dim": hidden_size,
                "n_blocks": n_blocks,
                "upsample_layer": "dense",
                "res_layer1": "dense",
                "res_layer2": "identity",}
                ],
    ]
    
    results = []
    model_names = []
    for name, model, model_args in model_list:
        t0 = time.perf_counter()
        model = model(**model_args).to(X_train.device)
        pred_train, _ = model.fit(X_train, y_train)
        t1 = time.perf_counter()
        pred_test = model(X_test)
        t2 = time.perf_counter()
        rmse_train = root_mean_squared_error(y_train.cpu(), pred_train.cpu().detach()) 
        rmse_test = root_mean_squared_error(y_test.cpu(), pred_test.cpu().detach())

        result = np.array( [rmse_train, rmse_test, t1-t0, t2-t1] )
        results.append( result )
        model_names.append( name )

    return model_names, results



def run_all_experiments(
        dataset_ids: List,
        name_save: str = "PLACEHOLDER",
        device="cpu",
        ):
    # Fetch and process each dataset
    experiments = {}
    for i, dataset_id in enumerate(dataset_ids):
        X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id, split_seed=0, device=device)
        generator = torch.Generator(device=device).manual_seed(999)
        results = run_allmodels_1dataset(
            generator, X_train, y_train, X_test, y_test, 
            )
        experiments[dataset_id] = results
        print(f" {i+1}/{len(dataset_ids)} Processed dataset {dataset_id}")

    # Save results
    # Assuming experiments is a dict where keys are dataset names and values are tuples (model_names, results)
    attributes = ["RMSE_train", "RMSE_test", "t_fit", "t_feat"]
    data_list = []
    # Process the data
    for dataset_name, (model_names, results) in experiments.items():
        dataset_data = {}
        for attr_idx, attribute in enumerate(attributes):
            for model_idx, model_name in enumerate(model_names):
                dataset_data[(attribute, model_name)] = results[model_idx][attr_idx]
        data_list.append(pd.DataFrame(dataset_data, index=[dataset_name]))

    # Combine all datasets into a single DataFrame
    df = pd.concat(data_list)
    df = df.sort_index(axis=1)
    print(df)
    df.to_pickle(f"OpenML_reg_{name_save}.pkl")
    return df

In [6]:
dataset_ids_not_categorical = list(df_metadata.query("has_categorical == False").index.values)
dataset_ids_not_categorical = [int(x) for x in dataset_ids_not_categorical]
run_all_experiments(dataset_ids_not_categorical)

100%|██████████| 50/50 [00:23<00:00,  2.14it/s]


 1/20 Processed dataset 44973


100%|██████████| 50/50 [05:51<00:00,  7.02s/it]


 2/20 Processed dataset 44975


100%|██████████| 50/50 [00:17<00:00,  2.83it/s]


 3/20 Processed dataset 44980


100%|██████████| 50/50 [00:17<00:00,  2.80it/s]


 4/20 Processed dataset 44981


100%|██████████| 50/50 [00:06<00:00,  7.20it/s]


 5/20 Processed dataset 45402


100%|██████████| 50/50 [00:01<00:00, 25.94it/s]


 6/20 Processed dataset 44994


100%|██████████| 50/50 [00:03<00:00, 14.41it/s]


 7/20 Processed dataset 44957


100%|██████████| 50/50 [00:02<00:00, 24.65it/s]


 8/20 Processed dataset 44970


100%|██████████| 50/50 [00:02<00:00, 20.43it/s]


 9/20 Processed dataset 44959


100%|██████████| 50/50 [00:01<00:00, 26.58it/s]


 10/20 Processed dataset 44960


100%|██████████| 50/50 [01:37<00:00,  1.96s/it]


 11/20 Processed dataset 44963


100%|██████████| 50/50 [01:44<00:00,  2.09s/it]


 12/20 Processed dataset 44976


100%|██████████| 50/50 [00:44<00:00,  1.13it/s]


 13/20 Processed dataset 44977


100%|██████████| 50/50 [00:29<00:00,  1.67it/s]


 14/20 Processed dataset 44983


100%|██████████| 50/50 [00:46<00:00,  1.09it/s]


 15/20 Processed dataset 44964


100%|██████████| 50/50 [00:02<00:00, 19.97it/s]


 16/20 Processed dataset 44965


100%|██████████| 50/50 [00:17<00:00,  2.81it/s]


 17/20 Processed dataset 44978


100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


 18/20 Processed dataset 44969


100%|██████████| 50/50 [00:03<00:00, 13.69it/s]


 19/20 Processed dataset 44972


100%|██████████| 50/50 [00:10<00:00,  4.58it/s]


 20/20 Processed dataset 44971
      RMSE_test                                                    \
        RidgeCV T=1 Dense T=1 SWIM Grad T=1 SWIM Unif T=3 End2End   
44973  0.595158  0.487190      0.484540      0.515265    0.267764   
44975  0.006491  0.216284      0.014534      0.016062    0.035913   
44980  0.771311  0.626406      0.603183      0.579103    0.332173   
44981  0.904478  0.906060      0.904416      0.904333    0.764078   
45402  0.706690  0.594246      0.681441      0.733225    0.618801   
44994  0.296725  0.232125      0.256265      0.259723    0.294330   
44957  0.674484  0.423936      0.469324      0.523909    0.424904   
44970  0.666021  0.614415      0.620453      0.635694    0.683948   
44959  0.542088  0.423389      0.364107      0.446359    0.381762   
44960  0.304327  0.201810      0.275287      0.266248    0.254899   
44963  0.839762  0.773993      0.789532      0.790094    0.604565   
44976  0.294862  0.284154      0.233588      0.245341    0.142780   
449

Unnamed: 0_level_0,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,RMSE_test,...,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit,t_fit
Unnamed: 0_level_1,RidgeCV,T=1 Dense,T=1 SWIM Grad,T=1 SWIM Unif,T=3 End2End,T=3 ResDense,T=3 ResSWIM Grad-dense,T=3 ResSWIM Grad-id,T=5 ResDense,T=5 ResSWIM Grad-dense,...,T=1 Dense,T=1 SWIM Grad,T=1 SWIM Unif,T=3 End2End,T=3 ResDense,T=3 ResSWIM Grad-dense,T=3 ResSWIM Grad-id,T=5 ResDense,T=5 ResSWIM Grad-dense,T=5 ResSWIM Grad-id
44973,0.595158,0.48719,0.48454,0.515265,0.267764,0.462851,0.476432,0.501349,0.50746,0.484716,...,0.181305,0.294013,0.255261,24.392364,0.177129,0.303514,0.224949,0.192235,0.340333,0.29786
44975,0.006491,0.216284,0.014534,0.016062,0.035913,0.267795,0.03148,0.029316,0.398372,0.059381,...,1.858905,1.613717,1.590133,356.992386,1.603182,1.984443,1.848183,1.654306,2.337145,2.173944
44980,0.771311,0.626406,0.603183,0.579103,0.332173,0.615005,0.57576,0.547171,0.633022,0.540944,...,0.075352,0.183124,0.145012,17.700974,0.175246,0.172382,0.265026,0.189547,0.250708,0.182656
44981,0.904478,0.90606,0.904416,0.904333,0.764078,0.909947,0.904919,0.9041,0.91058,0.904503,...,0.128444,0.17066,0.186523,17.919285,0.222856,0.233243,0.234229,0.193021,0.226908,0.206999
45402,0.70669,0.594246,0.681441,0.733225,0.618801,0.641916,0.651372,0.748465,0.579334,0.717807,...,0.038897,0.088248,0.057958,6.964257,0.082471,0.140973,0.181281,0.081883,0.167284,0.161679
44994,0.296725,0.232125,0.256265,0.259723,0.29433,0.255599,0.247598,0.237971,0.265426,0.242917,...,0.019839,0.04158,0.036466,1.939736,0.052477,0.135522,0.093249,0.054872,0.070515,0.09362
44957,0.674484,0.423936,0.469324,0.523909,0.424904,0.429798,0.343744,0.354992,0.44801,0.362259,...,0.034389,0.106709,0.143593,3.481455,0.099679,0.113773,0.128989,0.118105,0.137354,0.13218
44970,0.666021,0.614415,0.620453,0.635694,0.683948,0.613921,0.626454,0.625207,0.622942,0.631894,...,0.044006,0.078031,0.130627,2.043959,0.100299,0.065376,0.063853,0.045134,0.130529,0.075617
44959,0.542088,0.423389,0.364107,0.446359,0.381762,0.436573,0.355477,0.358918,0.429209,0.375161,...,0.043546,0.068022,0.099432,2.469734,0.116321,0.131063,0.121981,0.122685,0.163284,0.078237
44960,0.304327,0.20181,0.275287,0.266248,0.254899,0.239604,0.167304,0.219069,0.223315,0.197696,...,0.035328,0.09518,0.086452,1.891372,0.078895,0.139757,0.113925,0.146919,0.117862,0.102822


In [12]:
df_reg = pd.read_pickle("OpenML_reg_PLACEHOLDER.pkl")
df_reg["RMSE_test"].mean().sort_values()

T=3 End2End               0.445906
T=3 ResSWIM Grad-dense    0.480450
T=5 ResSWIM Grad-id       0.485050
T=3 ResSWIM Grad-id       0.488523
T=5 ResSWIM Grad-dense    0.492524
T=1 SWIM Grad             0.497886
T=1 Dense                 0.504190
T=3 ResDense              0.513892
T=1 SWIM Unif             0.521881
T=5 ResDense              0.522799
RidgeCV                   0.579533
dtype: float64

In [13]:
df_reg["RMSE_test"].rank(axis=1).mean().sort_values()

T=3 ResSWIM Grad-dense    4.25
T=5 ResSWIM Grad-id       4.55
T=3 ResSWIM Grad-id       4.70
T=3 End2End               4.85
T=1 Dense                 5.40
T=1 SWIM Grad             5.50
T=5 ResSWIM Grad-dense    5.90
T=3 ResDense              6.40
T=5 ResDense              7.20
T=1 SWIM Unif             7.95
RidgeCV                   9.30
dtype: float64

In [14]:
df_reg["RMSE_train"].mean().sort_values()

T=3 End2End               0.288300
T=3 ResSWIM Grad-dense    0.452473
T=5 ResSWIM Grad-id       0.455653
T=3 ResSWIM Grad-id       0.459548
T=5 ResSWIM Grad-dense    0.459733
T=1 Dense                 0.475713
T=3 ResDense              0.475718
T=1 SWIM Grad             0.476500
T=5 ResDense              0.490480
T=1 SWIM Unif             0.495691
RidgeCV                   0.569632
dtype: float64

In [15]:
df_reg["RMSE_train"].rank(axis=1).mean().sort_values()

T=3 End2End                2.25
T=3 ResSWIM Grad-dense     4.75
T=3 ResDense               5.30
T=5 ResSWIM Grad-id        5.40
T=3 ResSWIM Grad-id        5.45
T=5 ResSWIM Grad-dense     5.50
T=1 Dense                  6.00
T=5 ResDense               6.55
T=1 SWIM Grad              6.65
T=1 SWIM Unif              7.95
RidgeCV                   10.20
dtype: float64

In [None]:
# NEXT: implement boosting for the RandFeat models
# ALSO: I should also do boosting for learned Nets ...



#TODO NOTE NOTE next: add end2end and randfeatboost to regression models
#          i might also need to implement the gradient approach before this?  maybe not.  at least do line search probabily