In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable, Type
import abc

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim
import torch.utils.data
from torch import Tensor


from models.ridge_ALOOCV import fit_ridge_ALOOCV
from models.sandwiched_least_squares import sandwiched_LS_dense, sandwiched_LS_diag, sandwiched_LS_scalar

In [2]:
# Make regression data X, y
N = 1000
N_test = 1000
D = 10
d = 2
X = torch.randn(N, D)
X_test = torch.randn(N_test, D)
w_true = torch.randn(D, d)
y = (X @ w_true)**2 + torch.randn(N, d) * 0.1  # Adding some noise
y_test = (X_test @ w_true)**2 + torch.randn(N_test, d) * 0.1  # Adding some noise

In [3]:
from models.base import RidgeCVModule

#dense      
model = RidgeCVModule(
        lower_alpha=1e-6,
        upper_alpha=1e6,
        n_alphas=10,
     )

results = []
for i in range(10):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

train rmse tensor(19.4404) std tensor(0.)
test rmse tensor(19.6950) std tensor(0.)
train tensor([19.4404, 19.4404, 19.4404, 19.4404, 19.4404, 19.4404, 19.4404, 19.4404,
        19.4404, 19.4404])
test tensor([19.6950, 19.6950, 19.6950, 19.6950, 19.6950, 19.6950, 19.6950, 19.6950,
        19.6950, 19.6950])


In [4]:
from models.base import RidgeModule

#dense      
model = RidgeModule(
        l2_reg = 1e+2,
     )

results = []
for i in range(10):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

train rmse tensor(19.4891) std tensor(0.)
test rmse tensor(19.6898) std tensor(0.)
train tensor([19.4891, 19.4891, 19.4891, 19.4891, 19.4891, 19.4891, 19.4891, 19.4891,
        19.4891, 19.4891])
test tensor([19.6898, 19.6898, 19.6898, 19.6898, 19.6898, 19.6898, 19.6898, 19.6898,
        19.6898, 19.6898])


In [5]:
from models.random_feature_representation_boosting import GreedyRFRBoostRegressor

model = GreedyRFRBoostRegressor(
        in_dim=D,
        out_dim=d,
        hidden_dim=128,
        n_layers=3,
        randfeat_xt_dim=128,
        randfeat_x0_dim=128,
        l2_reg=0.1,
        l2_ghat=0.001,
        boost_lr=1.0,
        sandwich_solver="dense",
        feature_type="SWIM",
        upscale_type="iid",
        use_batchnorm=True,
    )        

results = []
for i in range(10):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

train rmse tensor(2.19900298118591308594) std tensor(0.09725400060415267944)
test rmse tensor(4.06416225433349609375) std tensor(0.15032306313514709473)
train tensor([2.09480309486389160156, 2.26082038879394531250, 2.31999373435974121094,
        2.25505876541137695312, 2.22264671325683593750, 2.23533487319946289062,
        2.00615024566650390625, 2.23011541366577148438, 2.10482168197631835938,
        2.26028704643249511719])
test tensor([3.89539861679077148438, 4.09609794616699218750, 4.25324440002441406250,
        4.07946491241455078125, 4.07455873489379882812, 4.29382181167602539062,
        3.80272769927978515625, 4.13765668869018554688, 3.95891666412353515625,
        4.04973220825195312500])


In [6]:
# train rmse tensor(2.3312) std tensor(0.5462)
# test rmse tensor(3.3370) std tensor(0.6041)
# train tensor([2.5459, 1.9770, 2.1338, 3.6567, 1.9788, 2.5264, 2.1827, 1.9025, 2.6022,
#         1.8058])
# test tensor([3.7197, 2.8087, 2.9656, 4.6065, 2.9207, 3.3118, 3.1447, 3.0470, 4.0548,
#         2.7906])

In [7]:
from models.random_feature_representation_boosting import GreedyRFRBoostRegressor

model = GreedyRFRBoostRegressor(
        in_dim=D,
        out_dim=d,
        hidden_dim=128,
        n_layers=5,
        randfeat_xt_dim=128,
        randfeat_x0_dim=128,
        l2_reg=0.01,
        l2_ghat=0.001,
        boost_lr=1.0,
        sandwich_solver="diag",
        feature_type="SWIM",
        upscale_type="iid",
    )        

results = []
for i in range(10):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

train rmse tensor(8.67100143432617187500) std tensor(1.08087289333343505859)
test rmse tensor(9.87579059600830078125) std tensor(1.23907089233398437500)
train tensor([ 9.00820636749267578125,  7.39738178253173828125,
         8.20129013061523437500,  9.24033355712890625000,
         8.00307273864746093750,  8.97256088256835937500,
         8.89156150817871093750, 11.20808506011962890625,
         7.85321378707885742188,  7.93431186676025390625])
test tensor([10.33485794067382812500,  8.42857551574707031250,
         9.14044666290283203125, 10.79509639739990234375,
         8.88615894317626953125, 10.16255187988281250000,
        10.21448993682861328125, 12.64363288879394531250,
         9.00464057922363281250,  9.14745712280273437500])


In [8]:
from models.random_feature_representation_boosting import GreedyRFRBoostRegressor

model = GreedyRFRBoostRegressor(
        in_dim=D,
        out_dim=d,
        hidden_dim=128,
        n_layers=5,
        randfeat_xt_dim=128,
        randfeat_x0_dim=128,
        l2_reg=0.01,
        l2_ghat=0.000001,
        boost_lr=1.0,
        sandwich_solver="scalar",
        feature_type="SWIM",
        upscale_type="iid",
    )        

results = []
for i in range(10):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

train rmse tensor(12.69861030578613281250) std tensor(0.00417760899290442467)
test rmse tensor(15.31273937225341796875) std tensor(0.00493232859298586845)
train tensor([12.69724369049072265625, 12.69814872741699218750,
        12.69791984558105468750, 12.70291519165039062500,
        12.69032859802246093750, 12.70163631439208984375,
        12.70002079010009765625, 12.69331264495849609375,
        12.70150566101074218750, 12.70306015014648437500])
test tensor([15.30890178680419921875, 15.31397247314453125000,
        15.31033992767333984375, 15.31766605377197265625,
        15.30603408813476562500, 15.31798648834228515625,
        15.31353759765625000000, 15.30506992340087890625,
        15.31529808044433593750, 15.31858539581298828125])


In [9]:
from models.random_feature_representation_boosting import GradientRFRBoostRegressor

model = GradientRFRBoostRegressor(
        in_dim=D,
        out_dim=d,
        hidden_dim=128,
        n_layers=5,
        randfeat_xt_dim=128,
        randfeat_x0_dim=128,
        l2_reg=0.1,
        l2_ghat=0.000001,
        boost_lr=1.0,
        feature_type="SWIM",
        upscale_type="iid",
    )

results = []
for i in range(5):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

train rmse tensor(1.00112879276275634766) std tensor(0.04419291764497756958)
test rmse tensor(2.71724319458007812500) std tensor(0.26086470484733581543)
train tensor([1.02486670017242431641, 1.06815397739410400391, 0.97267210483551025391,
        0.96833872795104980469, 0.97161233425140380859])
test tensor([2.87840771675109863281, 3.08482718467712402344, 2.43233823776245117188,
        2.58510398864746093750, 2.60553836822509765625])


# End2End

In [10]:
from old.models import End2EndMLPResNet

model = End2EndMLPResNet(
        in_dim=D,
        hidden_dim=128,
        bottleneck_dim=32,
        out_dim=d,
        n_blocks=3,
        loss = "mse",
        lr = 0.1,
        n_epochs = 30,
        end_lr_factor= 0.1,
        weight_decay = 0.001,
        batch_size = 64,
        )

results = []
for i in range(5):
    model.fit(X, y)
    out = model(X)
    out_test = model(X_test)
    rmse = torch.sqrt(nn.functional.mse_loss(out, y))
    rmse_test = torch.sqrt(nn.functional.mse_loss(out_test, y_test))
    results.append(torch.tensor([rmse, rmse_test]))
results = torch.stack(results)
print("train rmse", results[:, 0].mean(), "std", results[:, 0].std())
print("test rmse", results[:, 1].mean(), "std", results[:, 1].std())
print("train", results[:, 0])
print("test", results[:, 1])

100%|██████████| 30/30 [00:02<00:00, 11.75it/s]
100%|██████████| 30/30 [00:02<00:00, 12.01it/s]
100%|██████████| 30/30 [00:02<00:00, 11.04it/s]
100%|██████████| 30/30 [00:02<00:00, 10.43it/s]
100%|██████████| 30/30 [00:02<00:00, 11.01it/s]

train rmse tensor(2.86127591133117675781) std tensor(1.92053639888763427734)
test rmse tensor(3.33172488212585449219) std tensor(1.81116974353790283203)
train tensor([1.59191846847534179688, 4.87590885162353515625, 1.38130116462707519531,
        5.04846191406250000000, 1.40878915786743164062])
test tensor([2.17513227462768554688, 5.21700000762939453125, 1.73355352878570556641,
        5.39392375946044921875, 2.13901424407958984375])



