In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable, Type
import abc

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data
from torch import Tensor
from sklearn.linear_model import RidgeClassifierCV
import xgboost as xgb

from models.ridge_ALOOCV import fit_ridge_ALOOCV
from models.sandwiched_least_squares import sandwiched_LS_dense, sandwiched_LS_diag, sandwiched_LS_scalar

In [2]:
# Make regression data X, y
N = 1000
N_test = 1000
D = 10
d = 3
X = torch.randn(N, D)
X_test = torch.randn(N_test, D)
w_true = torch.randn(D, d)
y = (X @ w_true)**2 + torch.randn(N, d) * 0.1  # Adding some noise
y_test = X_test @ w_true + torch.randn(N_test, d) * 0.1  # Adding some noise

In [3]:
from models.models import GreedyRandFeatBoostRegression

#dense      
model = GreedyRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     l2_reg=1, 
     feature_type="SWIM", 
     upscale="dense", 
     sandwich_solver="dense"
     )
model.fit(X, y)
out = model(X)
out_test = model(X_test)
print("out_test", out_test)
rmse = torch.sqrt(F.mse_loss(out, y))
rmse_test = torch.sqrt(F.mse_loss(out_test, y_test))
print("train rmse", rmse)
print("test rmse", rmse_test)

out_test tensor([[14.1505,  4.9657,  2.2133],
        [34.0824,  0.1670,  3.2919],
        [ 1.2454, 16.6651,  0.1111],
        ...,
        [16.8812,  2.3296,  0.8744],
        [31.5556, -0.3879,  1.0041],
        [ 4.3109,  1.9008, 14.1609]])
train rmse tensor(3.0064)
test rmse tensor(13.0762)


In [13]:
#diag
model = GreedyRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     l2_reg=1, 
     feature_type="dense", 
     upscale="dense", 
     sandwich_solver="diag"
     )
model.fit(X, y)
out = model(X)
out_test = model(X_test)
print("out_test", out_test)
rmse = torch.sqrt(F.mse_loss(out, y))
rmse_test = torch.sqrt(F.mse_loss(out_test, y_test))
print("train rmse", rmse)
print("test rmse", rmse_test)

out_test tensor([[ 8.0390,  2.8302,  3.4341],
        [40.1104,  0.3094,  3.5824],
        [ 1.4164, 22.1556,  2.5424],
        ...,
        [15.4025,  3.0093,  3.4398],
        [23.7276, 12.1786, -0.3501],
        [ 2.8341, -0.4218, 13.2662]])
train rmse tensor(3.8690)
test rmse tensor(12.6157)


In [14]:
#scalar
model = GreedyRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     l2_reg=100, 
     feature_type="dense", 
     upscale="dense", 
     sandwich_solver="scalar"
     )
model.fit(X, y)
out = model(X)
out_test = model(X_test)
print("out_test", out_test)
rmse = torch.sqrt(F.mse_loss(out, y))
rmse_test = torch.sqrt(F.mse_loss(out_test, y_test))
print("train rmse", rmse)
print("test rmse", rmse_test)

out_test tensor([[11.0543,  6.0320,  5.5818],
        [ 9.3400,  5.8141,  3.9483],
        [10.5396,  6.5556,  4.8600],
        ...,
        [10.7161,  5.9109,  4.6081],
        [ 7.3195,  7.1261,  3.8941],
        [ 9.6393,  6.2542,  6.0133]])
train rmse tensor(10.1228)
test rmse tensor(8.0810)


In [11]:
from models.models import GradientRandFeatBoostRegression
        
model = GradientRandFeatBoostRegression(
     hidden_dim=128, 
     bottleneck_dim=128, 
     out_dim=d, 
     n_layers=5, 
     feature_type="dense", 
     upscale="dense", 
     )
model.fit(X, y)
out = model(X)
out_test = model(X_test)
print("out_test", out_test)
rmse = torch.sqrt(F.mse_loss(out, y))
rmse_test = torch.sqrt(F.mse_loss(out_test, y_test))
print("train rmse", rmse)
print("test rmse", rmse_test)

out_test tensor([[20.0471, -0.6396,  4.5811],
        [26.8040,  1.1201,  3.9404],
        [10.2454, 11.4717, -0.0879],
        ...,
        [ 9.8228,  3.6318,  4.6900],
        [30.8450,  7.5195,  2.5502],
        [-2.1990,  2.3350, 13.0190]])
train rmse tensor(5.9362)
test rmse tensor(12.0385)


# End2End

In [16]:
from models.models import End2EndMLPResNet

model = End2EndMLPResNet(
        in_dim=D,
        hidden_dim=32,
        bottleneck_dim=32,
        out_dim=d,
        n_blocks=3,
        loss = "mse",
        lr = 0.0001,
        n_epochs = 30,
        end_lr_factor= 0.1,
        weight_decay = 0.001,
        batch_size = 64,
        )
model.fit(X, y)
out = model(X)
out_test = model(X_test)
print("out_test", out_test)
rmse = torch.sqrt(F.mse_loss(out, y))
rmse_test = torch.sqrt(F.mse_loss(out_test, y_test))
print("train rmse", rmse)
print("test rmse", rmse_test)

100%|██████████| 30/30 [00:02<00:00, 14.41it/s]

out_test tensor([[ 3.9181,  1.1993, -0.2938],
        [ 4.5210, -0.3196, -0.8746],
        [-0.4343,  0.8500,  1.0927],
        ...,
        [ 2.3061, -0.7238, -0.9102],
        [ 2.7411,  3.0595,  0.2622],
        [-2.1720,  0.6815,  0.7976]], grad_fn=<AddmmBackward0>)
train rmse tensor(11.6170, grad_fn=<SqrtBackward0>)
test rmse tensor(3.5688, grad_fn=<SqrtBackward0>)





# Next model f(x_t, x_0)