In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable, Type
import abc

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data
from torch import Tensor
from sklearn.linear_model import RidgeClassifierCV
import xgboost as xgb

from models.ridge_ALOOCV import fit_ridge_ALOOCV
from models.sandwiched_least_squares import sandwiched_LS_dense, sandwiched_LS_diag, sandwiched_LS_scalar

In [2]:
# Make regression data X, y
N = 100
D = 10
d = 3
X = torch.randn(N, D)
w_true = torch.randn(D, d)
y = X @ w_true + torch.randn(N, d) * 0.1  # Adding some noise

In [3]:
from models.models import GreedyRandFeatBoostRegression

#dense      
model = GreedyRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     l2_reg=0.01, 
     feature_type="SWIM", 
     upscale="dense", 
     sandwich_solver="dense"
     )
model.fit(X, y)
model(X)

tensor([[-5.9119e+00, -8.0872e-01,  2.3483e-01],
        [-1.1253e+00,  1.1894e+00, -8.7050e-01],
        [ 2.2871e+00,  2.0951e+00, -5.5235e+00],
        [ 2.4135e+00, -2.1053e+00,  2.1494e+00],
        [ 4.4306e+00, -2.4564e+00, -8.2134e-01],
        [ 2.9608e+00, -3.8776e+00, -5.1961e-01],
        [-3.5687e+00,  6.2360e+00,  2.0958e+00],
        [ 3.1340e+00, -1.6999e+00,  1.1293e+00],
        [ 3.4200e+00,  2.1405e+00, -2.1301e-01],
        [ 2.3651e+00, -4.8967e+00, -2.8566e+00],
        [-9.5591e-02,  1.5292e+00, -2.2835e-02],
        [-4.3355e+00,  1.3344e+00,  3.7357e+00],
        [ 2.4427e+00,  1.7571e-03,  1.0338e+00],
        [ 9.0770e-01,  1.8662e+00, -2.6352e+00],
        [ 7.5678e+00, -7.2194e+00, -8.8440e-01],
        [ 1.8518e+00, -3.1571e+00,  1.2767e+00],
        [ 6.9717e+00, -5.3179e+00, -3.5196e+00],
        [-5.7705e+00,  2.7684e+00,  3.0489e+00],
        [-2.2329e+00,  5.0188e+00, -1.0776e-01],
        [ 5.8608e+00, -5.5761e+00, -1.9349e-02],
        [-2.2222e+00

In [4]:
#diag
model = GreedyRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     l2_reg=0.01, 
     feature_type="SWIM", 
     upscale="dense", 
     sandwich_solver="diag"
     )
model.fit(X, y)
model(X)

tensor([[-6.0011, -0.7917,  0.2629],
        [-1.2617,  1.2017, -1.0472],
        [ 2.2231,  2.1766, -5.4348],
        [ 2.4016, -2.0325,  2.2176],
        [ 4.4571, -2.4429, -0.8092],
        [ 2.9974, -3.7914, -0.5704],
        [-3.4667,  6.0456,  2.2543],
        [ 3.1422, -1.6700,  1.0765],
        [ 3.4947,  2.1311, -0.1798],
        [ 2.3866, -4.9515, -2.8623],
        [ 0.0646,  1.5709, -0.0280],
        [-4.3550,  1.2555,  3.5970],
        [ 2.4866,  0.0508,  1.1737],
        [ 0.8701,  1.8389, -2.7273],
        [ 7.4696, -7.3960, -0.9260],
        [ 1.6854, -3.1058,  1.1876],
        [ 7.0639, -5.2081, -3.4403],
        [-5.7380,  2.9352,  2.9761],
        [-2.2743,  4.9693, -0.0610],
        [ 5.6812, -5.5519, -0.0458],
        [-2.1800, -3.0504, -0.4466],
        [ 1.6485,  1.4827, -1.2235],
        [ 2.4048,  0.3574,  2.7305],
        [-4.3212, -1.1489,  1.0364],
        [ 0.5485, -0.7732,  2.8302],
        [-0.8743, -3.5150, -1.0759],
        [-1.7491,  1.3062,  2.3330],
 

In [5]:
#scalar
model = GreedyRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     l2_reg=0.01, 
     feature_type="SWIM", 
     upscale="dense", 
     sandwich_solver="scalar"
     )
model.fit(X, y)
model(X)

tensor([[-6.0167, -0.7788,  0.2569],
        [-1.2544,  1.1959, -1.0448],
        [ 2.1836,  2.1971, -5.4493],
        [ 2.3977, -2.0348,  2.2217],
        [ 4.4650, -2.4348, -0.8098],
        [ 3.0085, -3.7888, -0.5694],
        [-3.4622,  6.0298,  2.2648],
        [ 3.1481, -1.6628,  1.0763],
        [ 3.5095,  2.1351, -0.1756],
        [ 2.3863, -4.9625, -2.8608],
        [ 0.0745,  1.5703, -0.0287],
        [-4.3673,  1.2589,  3.5891],
        [ 2.4955,  0.0530,  1.1769],
        [ 0.8756,  1.8478, -2.7301],
        [ 7.4694, -7.4240, -0.9178],
        [ 1.6724, -3.0857,  1.1807],
        [ 7.0588, -5.2014, -3.4455],
        [-5.7447,  2.9471,  2.9675],
        [-2.2712,  4.9674, -0.0636],
        [ 5.6728, -5.5670, -0.0414],
        [-2.1768, -3.0609, -0.4479],
        [ 1.6583,  1.4858, -1.2176],
        [ 2.4018,  0.3672,  2.7274],
        [-4.3169, -1.1492,  1.0413],
        [ 0.5385, -0.7621,  2.8253],
        [-0.8781, -3.5130, -1.0784],
        [-1.7413,  1.3212,  2.3356],
 

In [6]:
from models.models import GradientRandFeatBoostRegression
        
model = GradientRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     feature_type="SWIM", 
     upscale="dense", 
     )
model.fit(X, y)
model(X)

tensor([[-6.0166, -0.7788,  0.2570],
        [-1.2546,  1.1959, -1.0448],
        [ 2.1838,  2.1970, -5.4495],
        [ 2.3976, -2.0348,  2.2218],
        [ 4.4651, -2.4349, -0.8098],
        [ 3.0087, -3.7888, -0.5694],
        [-3.4622,  6.0299,  2.2648],
        [ 3.1482, -1.6628,  1.0764],
        [ 3.5095,  2.1352, -0.1756],
        [ 2.3864, -4.9626, -2.8609],
        [ 0.0745,  1.5703, -0.0287],
        [-4.3674,  1.2590,  3.5889],
        [ 2.4955,  0.0530,  1.1770],
        [ 0.8757,  1.8478, -2.7302],
        [ 7.4695, -7.4241, -0.9178],
        [ 1.6727, -3.0858,  1.1807],
        [ 7.0588, -5.2015, -3.4456],
        [-5.7448,  2.9472,  2.9674],
        [-2.2711,  4.9674, -0.0636],
        [ 5.6727, -5.5670, -0.0414],
        [-2.1767, -3.0609, -0.4479],
        [ 1.6581,  1.4858, -1.2176],
        [ 2.4019,  0.3673,  2.7275],
        [-4.3170, -1.1492,  1.0412],
        [ 0.5386, -0.7622,  2.8254],
        [-0.8781, -3.5130, -1.0784],
        [-1.7414,  1.3214,  2.3355],
 