In [1]:
import torch

In [2]:
torch.cuda.is_available()

True

In [3]:
ρ = 0.1 # initialize ro
n = 1000 # sample size
p = 20 # dimension

In [4]:
def generate_dataset(n, p, ρ, r, β, duplication):
    X = torch.zeros((duplication, n, p))
    i = torch.arange(p).view(-1, 1)
    j = torch.arange(p).view(1, -1)
    
    Σ = torch.pow(ρ, torch.abs(i - j).float())
    L = torch.linalg.cholesky(Σ)
    Z = torch.randn(duplication, n, p)
    X = Z @ torch.transpose(L, 1, 0)
    
    σ = (1-r)/r * (β.T @ Σ @ β)
    
    ε = torch.randn(n) * torch.sqrt(σ)
    
    Y = X @ β + ε
    
    return X, Y, {
        'Σ': Σ,
        'ε': ε
    }   

In [5]:
X, Y, other = generate_dataset(n=100, p=10, ρ=0, β=torch.ones(10), r=0.8, duplication=1000)

  σ = (1-r)/r * (β.T @ Σ @ β)


In [6]:
from torch import nn

xtx = X.permute(0, 2, 1)@X

In [7]:
xtx.shape

torch.Size([1000, 10, 10])

In [8]:
Y = Y.unsqueeze(-1)

In [9]:
from torch.optim import Adam, SGD
from torch.nn.functional import mse_loss, l1_loss
from tqdm import tqdm

with torch.no_grad():
    β_ols = torch.linalg.pinv(X) @ Y  # Initial OLS estimates
    weights = 1 / (β_ols.abs() + 1e-5)  # Adaptive weights
    wtw = weights.permute(0, 2, 1) @ weights  # W^T W

def lasso(λ, epochs=200, lr=0.025, adaptive=False, wtw=wtw, weights=weights):
    p = X.shape[-1]
    β = torch.randn((1000, p, 1), device='cuda', requires_grad=True)
    optimizer = Adam([β], lr=lr)
    loss_l = torch.zeros(epochs)
    ones = torch.ones((1000, 1), device='cuda')
    # pbar = tqdm(total=epochs, desc='LASSO')
    for epoch in range(epochs):
        optimizer.zero_grad()
        mse = torch.mean(((Y-X@β)**2), dim=1)
        l1norm = torch.sum(β.abs(), dim=1)
        # print(mse.shape, l1norm.shape)
        loss = mse + λ*l1norm
        loss.backward(ones)
        loss_l[epoch] = loss[-1]
        # pbar.set_postfix_str(f'loss: {loss}')
        # pbar.update()
        optimizer.step()
    # rss = torch.sum(((Y-X@β)**2), dim=1).squeeze(-1)
    mse = torch.mean(((Y-X@β)**2), dim=1).flatten()
    # k = (β.abs() > 1e-5).sum()
    k = 100
    aic = 100 * torch.log(mse*0.01) + torch.full(size=(1000, ), fill_value=2*k, device='cuda')
    bic = 100 * torch.log(mse*0.01) + torch.full(size=(1000, ), fill_value=torch.log(torch.tensor(100))*k, device='cuda')
    
    return β, aic, bic, mse, loss_l

In [28]:
def adaptive_lasso(λ, epochs=200, lr = 0.025, adaptive=False, wtw=wtw, weights=weights):
    p = X.shape[-1]
    β = torch.randn((1000, p, 1), device='cuda', requires_grad=True)
    optimizer = Adam([β], lr=lr)
    loss_l = torch.zeros(epochs, device='cuda')
    ones = torch.ones((1000, 1), device='cuda')

    # with torch.no_grad():
    #     β_ols = torch.linalg.pinv(X) @ Y  # Initial OLS estimates
    #     weights = 1 / (β_ols.abs() + 1e-5)  # Adaptive weights
    # global weights
    if weights.device != 'cuda':
        weights = weights.cuda()
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        mse = torch.mean(((Y - X @ β) ** 2), dim=1)
        l1norm = torch.sum(weights * β.abs(), dim=1)  # Adaptive L1 penalty
        loss = mse + λ * l1norm
        loss.backward(ones)
        loss_l[epoch] = loss[-1]
        optimizer.step()
    
    # rss = torch.sum(((Y - X @ β) ** 2), dim=1).squeeze(-1)
    mse = torch.mean(((Y - X @ β) ** 2), dim=1).flatten()
    # k = (β.abs() > 1e-5).sum()
    k = 100
    aic = 100 * torch.log(mse * 0.01) + torch.full(size=(1000,), fill_value=2 * k, device='cuda')
    bic = 100 * torch.log(mse * 0.01) + torch.full(size=(1000,), fill_value=torch.log(torch.tensor(100)) * k, device='cuda')
    
    return β, aic, bic, mse, loss_l
    

In [29]:
def ridge(λ, adaptive=False, wtw=wtw, weights=weights):
    # closed form solution
    
    global X, xtx, Y
    if xtx.device != 'cuda':
        xtx = xtx.to('cuda')
    if Y.device != 'cuda':
        Y = Y.to('cuda')
    if X.device != 'cuda':
        X = X.to('cuda')
    try:
        β = torch.linalg.solve(xtx + λ * torch.eye(xtx.shape[-1], device='cuda'), X.permute(0, 2, 1)@Y)
    except:
        # β = torch.linalg.pinv(xtx + λ * torch.eye(xtx.shape[-1], device='cuda')) @ X.permute(0, 2, 1)@Y
        # add a small value to lambda
        β = torch.linalg.solve(xtx + (λ + 1e-5) * torch.eye(xtx.shape[-1], device='cuda'), X.permute(0, 2, 1)@Y)
    
    # use pinv
    # β = torch.linalg.pinv(xtx + λ * torch.eye(xtx.shape[-1], device='cuda')) @ X.permute(0, 2, 1)@Y
    
    
    # report AIC, BIC
    # rss = torch.sum(((Y-X@β)**2), dim=1).squeeze(-1)
    mse = torch.mean(((Y-X@β)**2), dim=1).flatten()
    # k = (β.abs() > 1e-5).sum()
    k = 100
    aic = 100 * torch.log(mse) + torch.full(size=(1000, ), fill_value=2*k, device='cuda')
    bic = 100 * torch.log(mse) + torch.full(size=(1000, ), fill_value=torch.log(torch.tensor(100))*k, device='cuda')
    
    return β, aic, bic, mse
    

In [30]:
def ridge_adaptive(λ, adaptive=True, wtw=wtw, weights=weights):
    global X, xtx, Y
    if xtx.device != 'cuda':
        xtx = xtx.to('cuda')
    if Y.device != 'cuda':
        Y = Y.to('cuda')
    if X.device != 'cuda':
        X = X.to('cuda')
    
    # global wtw
    if wtw.device != 'cuda':
        wtw = wtw.to('cuda')
    
    try:
        β = torch.linalg.solve(xtx + λ * wtw, X.permute(0, 2, 1) @ Y)
    except:
        # add purturbation to lambda
        β = torch.linalg.pinv(xtx + (λ+1e-5) * wtw) @ X.permute(0, 2, 1) @ Y
    # use pinv
    # β = torch.linalg.pinv(xtx + λ * wtw) @ X.permute(0, 2, 1) @ Y
    
    # rss = torch.sum(((Y - X @ β) ** 2), dim=1).squeeze(-1)
    mse = torch.mean(((Y - X @ β) ** 2), dim=1).flatten()
    # k = (β.abs() > 1e-5).sum()
    k = 100
    aic = 100 * torch.log(mse) + torch.full(size=(1000,), fill_value=2 * k, device='cuda')
    bic = 100 * torch.log(mse) + torch.full(size=(1000,), fill_value=torch.log(torch.tensor(100)) * k, device='cuda')
    
    return β, aic, bic, mse

In [31]:
def loo_cv(model, λ, adaptive=False, weights=weights, wtw=wtw):
    global X, Y
    n = X.shape[1]
    rss = torch.zeros((1000, n), device='cuda')
    # for i in tqdm(range(n)):
    for i in range(n):
        X_ = torch.cat((X[:, :i], X[:, i+1:]), dim=1)
        Y_ = torch.cat((Y[:, :i], Y[:, i+1:]), dim=1)
        res = model(λ, adaptive=adaptive, weights=weights, wtw=wtw)
        match res:
            case (β, _, _, mse):
                β, _, _, mse = res
            case (β, _, _, mse, loss):
                β, _, _, mse, loss = res
        # rss[:, i] = torch.sum(((Y_ - X_ @ β) ** 2), dim=1).squeeze(-1)
        if mse.dim() == 1:
            mse = mse.unsqueeze(-1)
        rss[:, i] = torch.mean(mse, dim=1)
    mse = model(λ, adaptive=adaptive, weights=weights, wtw=wtw)[3]
    
    return torch.mean(rss, dim=1), mse

In [32]:
# print("Using model: ridge")
# loo_cv(ridge, 0.1)

# print("Using model: ridge_adaptive")
# loo_cv(ridge_adaptive, 0.1)

# print("Using model: lasso")
# loo_cv(lasso, 0.1)

# print("Using model: adaptive_lasso")
# loo_cv(adaptive_lasso, 0.1)

In [33]:
a = loo_cv(ridge, 0.1)
a[0].shape, a[1].shape

(torch.Size([1000]), torch.Size([1000]))

In [34]:
def put_together(n, p, rho, beta, wtw, weights):
    global X, Y, xtx
    X, Y, _ = generate_dataset(n, p, rho, 0.8, beta, 1000)
    if len(Y.shape) == 2:
        Y = Y.unsqueeze(-1)
    xtx = X.permute(0, 2, 1) @ X

    # print(X.shape, Y.shape, xtx.shape)

    # aic, bic, loocv mat
    best_ridge_aic = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_adaptive_aic = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_aic = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_adaptive_aic = torch.full((1000,), float('inf'), device='cuda')

    best_ridge_aic_mse = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_adaptive_aic_mse = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_aic_mse = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_adaptive_aic_mse = torch.full((1000,), float('inf'), device='cuda')

    best_ridge_bic = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_adaptive_bic = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_bic = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_adaptive_bic = torch.full((1000,), float('inf'), device='cuda')

    best_ridge_bic_mse = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_adaptive_bic_mse = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_bic_mse = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_adaptive_bic_mse = torch.full((1000,), float('inf'), device='cuda')

    best_ridge_loocv = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_adaptive_loocv = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_loocv = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_adaptive_loocv = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_loocv_mse = torch.full((1000,), float('inf'), device='cuda')
    best_ridge_adaptive_loocv_mse = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_loocv_mse = torch.full((1000,), float('inf'), device='cuda')
    best_lasso_adaptive_loocv_mse = torch.full((1000,), float('inf'), device='cuda')
    
    pbar = tqdm(total=10, desc='λ')
    pbar.update(0)
    for i,λ in enumerate(torch.linspace(0, 0.5, 10)):
        β_ridge, aic_ridge, bic_ridge, mse_ridge = ridge(λ, adaptive=False, wtw=wtw, weights=weights)
        β_ridge_adaptive, aic_ridge_adaptive, bic_ridge_adaptive, mse_ridge_adaptive = ridge_adaptive(λ, adaptive=True, wtw=wtw, weights=weights)
        β_lasso, aic_lasso, bic_lasso, mse_lasso, loss_lasso = lasso(λ, adaptive=False, wtw=wtw, weights=weights)
        β_adaptive_lasso, aic_adaptive_lasso, bic_adaptive_lasso, mse_adaptive_lasso, loss_adaptive_lasso = adaptive_lasso(λ, adaptive=True, wtw=wtw, weights=weights)

        # print('loocv')

        # loocv
        loocv_ridge, mse_ridge = loo_cv(ridge, λ, False, weights, wtw)
        loocv_ridge_adaptive, mse_ridge_adaptive = loo_cv(ridge_adaptive, λ, True, weights, wtw)
        loocv_lasso, mse_lasso = loo_cv(lasso, λ, False, weights, wtw)
        loocv_adaptive_lasso, mse_adaptive_lasso = loo_cv(adaptive_lasso, λ, True, weights, wtw)


        # index for improved loocv ridge
        ridge_better_ind = loocv_ridge < best_ridge_loocv

        # update the best loocv and loocv mse
        best_ridge_loocv[ridge_better_ind] = loocv_ridge[ridge_better_ind]
        best_ridge_loocv_mse[ridge_better_ind] = mse_ridge[ridge_better_ind]


        # index for improved loocv adaptive ridge
        ridge_adaptive_better_ind = loocv_ridge_adaptive < best_ridge_adaptive_loocv

        # update the best loocv and loocv mse
        best_ridge_adaptive_loocv[ridge_adaptive_better_ind] = loocv_ridge_adaptive[ridge_adaptive_better_ind]
        best_ridge_adaptive_loocv_mse[ridge_adaptive_better_ind] = mse_ridge_adaptive[ridge_adaptive_better_ind]

        # index for improved loocv lasso
        lasso_better_ind = loocv_lasso < best_lasso_loocv

        # update the best loocv and loocv mse
        best_lasso_loocv[lasso_better_ind] = loocv_lasso[lasso_better_ind]
        best_lasso_loocv_mse[lasso_better_ind] = mse_lasso[lasso_better_ind]


        # index for improved loocv adaptive lasso
        adaptive_lasso_better_ind = loocv_adaptive_lasso < best_lasso_adaptive_loocv

        # update the best loocv and loocv mse
        best_lasso_adaptive_loocv[adaptive_lasso_better_ind] = loocv_adaptive_lasso[adaptive_lasso_better_ind]
        best_lasso_adaptive_loocv_mse[adaptive_lasso_better_ind] = mse_adaptive_lasso[adaptive_lasso_better_ind]


        # the indexes of which aic is smaller than the previous aic
        ridge_better_ind = aic_ridge < best_ridge_aic
        # print(ridge_better_ind.shape, ridge_better_ind)
        # update the best aic and aic mse
        best_ridge_aic[ridge_better_ind] = aic_ridge[ridge_better_ind]
        best_ridge_aic_mse[ridge_better_ind] = mse_ridge[ridge_better_ind]

        # best aic for adaptive ridge
        ridge_adaptive_better_ind = torch.where(aic_ridge_adaptive < best_ridge_adaptive_aic)

        best_ridge_adaptive_aic[ridge_adaptive_better_ind] = aic_ridge_adaptive[ridge_adaptive_better_ind]
        best_ridge_adaptive_aic_mse[ridge_adaptive_better_ind] = mse_ridge_adaptive[ridge_adaptive_better_ind]

        # best aic for lasso
        lasso_better_ind = aic_lasso < best_lasso_aic
        best_lasso_aic[lasso_better_ind] = aic_lasso[lasso_better_ind]
        best_lasso_aic_mse[lasso_better_ind] = mse_lasso[lasso_better_ind]

        # best aic for adaptive lasso   
        adaptive_lasso_better_ind = aic_adaptive_lasso < best_lasso_adaptive_aic

        best_lasso_adaptive_aic[adaptive_lasso_better_ind] = aic_adaptive_lasso[adaptive_lasso_better_ind]
        best_lasso_adaptive_aic_mse[adaptive_lasso_better_ind] = mse_adaptive_lasso[adaptive_lasso_better_ind]

        # best bic for ridge
        ridge_better_ind = bic_ridge < best_ridge_bic

        best_ridge_bic[ridge_better_ind] = bic_ridge[ridge_better_ind]
        best_ridge_bic_mse[ridge_better_ind] = mse_ridge[ridge_better_ind]

        # best bic for adaptive ridge
        ridge_adaptive_better_ind = bic_ridge_adaptive < best_ridge_adaptive_bic

        best_ridge_adaptive_bic[ridge_adaptive_better_ind] = bic_ridge_adaptive[ridge_adaptive_better_ind]
        best_ridge_adaptive_bic_mse[ridge_adaptive_better_ind] = mse_ridge_adaptive[ridge_adaptive_better_ind]

        # best bic for lasso
        lasso_better_ind = bic_lasso < best_lasso_bic

        best_lasso_bic[lasso_better_ind] = bic_lasso[lasso_better_ind]
        best_lasso_bic_mse[lasso_better_ind] = mse_lasso[lasso_better_ind]

        # best bic for adaptive lasso
        adaptive_lasso_better_ind = bic_adaptive_lasso < best_lasso_adaptive_bic

        best_lasso_adaptive_bic[adaptive_lasso_better_ind] = bic_adaptive_lasso[adaptive_lasso_better_ind]
        best_lasso_adaptive_bic_mse[adaptive_lasso_better_ind] = mse_adaptive_lasso[adaptive_lasso_better_ind]

        pbar.update(1)
        pbar.set_postfix_str(f'λ: {λ}')

    # now we have the best mses for aic and bic
    # output the average best mse for all cases
    # return torch.mean(best_ridge_aic_mse), torch.mean(best_ridge_adaptive_aic_mse), torch.mean(best_lasso_aic_mse), torch.mean(best_lasso_adaptive_aic_mse), torch.mean(best_ridge_bic_mse), torch.mean(best_ridge_adaptive_bic_mse), torch.mean(best_lasso_bic_mse), torch.mean(best_lasso_adaptive_bic_mse)
    return {
        'ridge_aic': torch.mean(best_ridge_aic_mse),
        'ridge_adaptive_aic': torch.mean(best_ridge_adaptive_aic_mse),
        'lasso_aic': torch.mean(best_lasso_aic_mse),
        'lasso_adaptive_aic': torch.mean(best_lasso_adaptive_aic_mse),
        'ridge_bic': torch.mean(best_ridge_bic_mse),
        'ridge_adaptive_bic': torch.mean(best_ridge_adaptive_bic_mse),
        'lasso_bic': torch.mean(best_lasso_bic_mse),
        'lasso_adaptive_bic': torch.mean(best_lasso_adaptive_bic_mse),
        'ridge_loocv': torch.mean(best_ridge_loocv_mse),
        'ridge_adaptive_loocv': torch.mean(best_ridge_adaptive_loocv_mse),
        'lasso_loocv': torch.mean(best_lasso_loocv_mse),
        'lasso_adaptive_loocv': torch.mean(best_lasso_adaptive_loocv_mse)
    }

In [35]:
# ans1 = put_together(100, 10, 0.1, torch.ones(10))

In [36]:
def experiments():
    n = 100
    ps = [50, 25, 10]
    ρs = [0, 0.25, 0.5]
    
    global X, Y
    
    results = {}
    for p in ps:
        β_sparse = torch.zeros((p))
        β_dense = torch.ones((p))
        β_sparse[:int(p**0.5)] = 2 / p**0.5
        β_dense[:] = 5 / torch.arange(1, p+1).float().sqrt()
        
        for ρ in ρs:
            X, Y, _ = generate_dataset(n, p, 0, 0.8, β_sparse, 1000)
            xtx = X.permute(0, 2, 1)@X
            with torch.no_grad():
                # β_ols = torch.linalg.pinv(X) @ Y  # Initial OLS estimates
                β_ols = torch.linalg.lstsq(X, Y).solution
                weights = 1 / (β_ols.abs() + 1e-5)  # Adaptive weights
                if weights.dim() == 2:
                    weights = weights.unsqueeze(-1)
                wtw = weights.permute(0, 2, 1)@weights
            results[f'{n=}_{p=}_{ρ=}_sparse'] = put_together(n, p, 0, β_sparse, wtw, weights)
            results[f'{n=}_{p=}_{ρ=}_dense'] = put_together(n, p, 0, β_dense, wtw, weights)
            
    return results
    
    
    
    
    

In [37]:
results = experiments()



λ: 100%|██████████| 10/10 [26:31<00:00, 159.16s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [17:25<00:00, 104.55s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [38:09<00:00, 228.92s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [34:02<00:00, 204.25s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [32:00<00:00, 192.10s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [32:06<00:00, 192.69s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:57<00:00, 83.80s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:57<00:00, 83.75s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [14:08<00:00, 84.87s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:58<00:00, 83.89s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [14:01<00:00, 84.17s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:50<00:00, 83.00s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:49<00:00, 82.95s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:43<00:00, 82.30s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [13:19<00:00, 79.93s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [12:23<00:00, 74.35s/it, λ: 0.5]
λ: 100%|██████████| 10/10 [12:46<00:00, 76.63s/it,

In [38]:
results

{'n=100_p=50_ρ=0_sparse': {'ridge_aic': tensor(0.0721, device='cuda:0'),
  'ridge_adaptive_aic': tensor(0.0721, device='cuda:0'),
  'lasso_aic': tensor(0.1603, device='cuda:0', grad_fn=<MeanBackward0>),
  'lasso_adaptive_aic': tensor(0.1773, device='cuda:0', grad_fn=<MeanBackward0>),
  'ridge_bic': tensor(0.0721, device='cuda:0'),
  'ridge_adaptive_bic': tensor(0.0721, device='cuda:0'),
  'lasso_bic': tensor(0.1603, device='cuda:0', grad_fn=<MeanBackward0>),
  'lasso_adaptive_bic': tensor(0.1773, device='cuda:0', grad_fn=<MeanBackward0>),
  'ridge_loocv': tensor(0.0721, device='cuda:0'),
  'ridge_adaptive_loocv': tensor(0.0721, device='cuda:0'),
  'lasso_loocv': tensor(0.1463, device='cuda:0', grad_fn=<MeanBackward0>),
  'lasso_adaptive_loocv': tensor(0.1694, device='cuda:0', grad_fn=<MeanBackward0>)},
 'n=100_p=50_ρ=0_dense': {'ridge_aic': tensor(15.5376, device='cuda:0'),
  'ridge_adaptive_aic': tensor(15.5376, device='cuda:0'),
  'lasso_aic': tensor(20.7056, device='cuda:0', grad_fn

In [40]:
import pandas as pd

df = pd.DataFrame(results)
df.to_csv('results.csv')
df

Unnamed: 0,n=100_p=50_ρ=0_sparse,n=100_p=50_ρ=0_dense,n=100_p=50_ρ=0.25_sparse,n=100_p=50_ρ=0.25_dense,n=100_p=50_ρ=0.5_sparse,n=100_p=50_ρ=0.5_dense,n=100_p=25_ρ=0_sparse,n=100_p=25_ρ=0_dense,n=100_p=25_ρ=0.25_sparse,n=100_p=25_ρ=0.25_dense,n=100_p=25_ρ=0.5_sparse,n=100_p=25_ρ=0.5_dense,n=100_p=10_ρ=0_sparse,n=100_p=10_ρ=0_dense,n=100_p=10_ρ=0.25_sparse,n=100_p=10_ρ=0.25_dense,n=100_p=10_ρ=0.5_sparse,n=100_p=10_ρ=0.5_dense
ridge_aic,"tensor(0.0721, device='cuda:0')","tensor(15.5376, device='cuda:0')","tensor(0.0629, device='cuda:0')","tensor(16.8556, device='cuda:0')","tensor(0.0615, device='cuda:0')","tensor(14.2813, device='cuda:0')","tensor(0.1451, device='cuda:0')","tensor(17.7191, device='cuda:0')","tensor(0.1760, device='cuda:0')","tensor(14.6512, device='cuda:0')","tensor(0.1572, device='cuda:0')","tensor(21.4199, device='cuda:0')","tensor(0.2812, device='cuda:0')","tensor(17.8453, device='cuda:0')","tensor(0.2612, device='cuda:0')","tensor(15.7942, device='cuda:0')","tensor(0.2117, device='cuda:0')","tensor(17.5308, device='cuda:0')"
ridge_adaptive_aic,"tensor(0.0721, device='cuda:0')","tensor(15.5376, device='cuda:0')","tensor(0.0629, device='cuda:0')","tensor(16.8556, device='cuda:0')","tensor(0.0615, device='cuda:0')","tensor(14.2813, device='cuda:0')","tensor(0.1451, device='cuda:0')","tensor(17.7191, device='cuda:0')","tensor(0.1760, device='cuda:0')","tensor(14.6512, device='cuda:0')","tensor(0.1572, device='cuda:0')","tensor(21.4199, device='cuda:0')","tensor(0.2812, device='cuda:0')","tensor(17.8453, device='cuda:0')","tensor(0.2612, device='cuda:0')","tensor(15.7942, device='cuda:0')","tensor(0.2117, device='cuda:0')","tensor(17.5308, device='cuda:0')"
lasso_aic,"tensor(0.1603, device='cuda:0', grad_fn=<MeanB...","tensor(20.7056, device='cuda:0', grad_fn=<Mean...","tensor(0.1465, device='cuda:0', grad_fn=<MeanB...","tensor(21.9002, device='cuda:0', grad_fn=<Mean...","tensor(0.1437, device='cuda:0', grad_fn=<MeanB...","tensor(19.3557, device='cuda:0', grad_fn=<Mean...","tensor(0.1680, device='cuda:0', grad_fn=<MeanB...","tensor(21.7464, device='cuda:0', grad_fn=<Mean...","tensor(0.2009, device='cuda:0', grad_fn=<MeanB...","tensor(18.6939, device='cuda:0', grad_fn=<Mean...","tensor(0.1804, device='cuda:0', grad_fn=<MeanB...","tensor(25.6585, device='cuda:0', grad_fn=<Mean...","tensor(0.2873, device='cuda:0', grad_fn=<MeanB...","tensor(21.5131, device='cuda:0', grad_fn=<Mean...","tensor(0.2697, device='cuda:0', grad_fn=<MeanB...","tensor(19.5003, device='cuda:0', grad_fn=<Mean...","tensor(0.2190, device='cuda:0', grad_fn=<MeanB...","tensor(21.1763, device='cuda:0', grad_fn=<Mean..."
lasso_adaptive_aic,"tensor(0.1773, device='cuda:0', grad_fn=<MeanB...","tensor(20.1790, device='cuda:0', grad_fn=<Mean...","tensor(0.1688, device='cuda:0', grad_fn=<MeanB...","tensor(21.5623, device='cuda:0', grad_fn=<Mean...","tensor(0.1669, device='cuda:0', grad_fn=<MeanB...","tensor(18.8046, device='cuda:0', grad_fn=<Mean...","tensor(0.1683, device='cuda:0', grad_fn=<MeanB...","tensor(21.4606, device='cuda:0', grad_fn=<Mean...","tensor(0.2024, device='cuda:0', grad_fn=<MeanB...","tensor(18.4426, device='cuda:0', grad_fn=<Mean...","tensor(0.1791, device='cuda:0', grad_fn=<MeanB...","tensor(25.2796, device='cuda:0', grad_fn=<Mean...","tensor(0.2890, device='cuda:0', grad_fn=<MeanB...","tensor(21.3381, device='cuda:0', grad_fn=<Mean...","tensor(0.2686, device='cuda:0', grad_fn=<MeanB...","tensor(19.4312, device='cuda:0', grad_fn=<Mean...","tensor(0.2191, device='cuda:0', grad_fn=<MeanB...","tensor(20.9192, device='cuda:0', grad_fn=<Mean..."
ridge_bic,"tensor(0.0721, device='cuda:0')","tensor(15.5376, device='cuda:0')","tensor(0.0629, device='cuda:0')","tensor(16.8556, device='cuda:0')","tensor(0.0615, device='cuda:0')","tensor(14.2813, device='cuda:0')","tensor(0.1451, device='cuda:0')","tensor(17.7191, device='cuda:0')","tensor(0.1760, device='cuda:0')","tensor(14.6512, device='cuda:0')","tensor(0.1572, device='cuda:0')","tensor(21.4199, device='cuda:0')","tensor(0.2812, device='cuda:0')","tensor(17.8453, device='cuda:0')","tensor(0.2612, device='cuda:0')","tensor(15.7942, device='cuda:0')","tensor(0.2117, device='cuda:0')","tensor(17.5308, device='cuda:0')"
ridge_adaptive_bic,"tensor(0.0721, device='cuda:0')","tensor(15.5376, device='cuda:0')","tensor(0.0629, device='cuda:0')","tensor(16.8556, device='cuda:0')","tensor(0.0615, device='cuda:0')","tensor(14.2813, device='cuda:0')","tensor(0.1451, device='cuda:0')","tensor(17.7191, device='cuda:0')","tensor(0.1760, device='cuda:0')","tensor(14.6512, device='cuda:0')","tensor(0.1572, device='cuda:0')","tensor(21.4199, device='cuda:0')","tensor(0.2812, device='cuda:0')","tensor(17.8453, device='cuda:0')","tensor(0.2612, device='cuda:0')","tensor(15.7942, device='cuda:0')","tensor(0.2117, device='cuda:0')","tensor(17.5308, device='cuda:0')"
lasso_bic,"tensor(0.1603, device='cuda:0', grad_fn=<MeanB...","tensor(20.7056, device='cuda:0', grad_fn=<Mean...","tensor(0.1465, device='cuda:0', grad_fn=<MeanB...","tensor(21.9002, device='cuda:0', grad_fn=<Mean...","tensor(0.1437, device='cuda:0', grad_fn=<MeanB...","tensor(19.3557, device='cuda:0', grad_fn=<Mean...","tensor(0.1680, device='cuda:0', grad_fn=<MeanB...","tensor(21.7464, device='cuda:0', grad_fn=<Mean...","tensor(0.2009, device='cuda:0', grad_fn=<MeanB...","tensor(18.6939, device='cuda:0', grad_fn=<Mean...","tensor(0.1804, device='cuda:0', grad_fn=<MeanB...","tensor(25.6585, device='cuda:0', grad_fn=<Mean...","tensor(0.2873, device='cuda:0', grad_fn=<MeanB...","tensor(21.5131, device='cuda:0', grad_fn=<Mean...","tensor(0.2697, device='cuda:0', grad_fn=<MeanB...","tensor(19.5003, device='cuda:0', grad_fn=<Mean...","tensor(0.2190, device='cuda:0', grad_fn=<MeanB...","tensor(21.1763, device='cuda:0', grad_fn=<Mean..."
lasso_adaptive_bic,"tensor(0.1773, device='cuda:0', grad_fn=<MeanB...","tensor(20.1790, device='cuda:0', grad_fn=<Mean...","tensor(0.1688, device='cuda:0', grad_fn=<MeanB...","tensor(21.5623, device='cuda:0', grad_fn=<Mean...","tensor(0.1669, device='cuda:0', grad_fn=<MeanB...","tensor(18.8046, device='cuda:0', grad_fn=<Mean...","tensor(0.1683, device='cuda:0', grad_fn=<MeanB...","tensor(21.4606, device='cuda:0', grad_fn=<Mean...","tensor(0.2024, device='cuda:0', grad_fn=<MeanB...","tensor(18.4426, device='cuda:0', grad_fn=<Mean...","tensor(0.1791, device='cuda:0', grad_fn=<MeanB...","tensor(25.2796, device='cuda:0', grad_fn=<Mean...","tensor(0.2890, device='cuda:0', grad_fn=<MeanB...","tensor(21.3381, device='cuda:0', grad_fn=<Mean...","tensor(0.2686, device='cuda:0', grad_fn=<MeanB...","tensor(19.4312, device='cuda:0', grad_fn=<Mean...","tensor(0.2191, device='cuda:0', grad_fn=<MeanB...","tensor(20.9192, device='cuda:0', grad_fn=<Mean..."
ridge_loocv,"tensor(0.0721, device='cuda:0')","tensor(15.5376, device='cuda:0')","tensor(0.0629, device='cuda:0')","tensor(16.8556, device='cuda:0')","tensor(0.0615, device='cuda:0')","tensor(14.2813, device='cuda:0')","tensor(0.1451, device='cuda:0')","tensor(17.7191, device='cuda:0')","tensor(0.1760, device='cuda:0')","tensor(14.6512, device='cuda:0')","tensor(0.1572, device='cuda:0')","tensor(21.4199, device='cuda:0')","tensor(0.2812, device='cuda:0')","tensor(17.8453, device='cuda:0')","tensor(0.2612, device='cuda:0')","tensor(15.7942, device='cuda:0')","tensor(0.2117, device='cuda:0')","tensor(17.5308, device='cuda:0')"
ridge_adaptive_loocv,"tensor(0.0721, device='cuda:0')","tensor(15.5376, device='cuda:0')","tensor(0.0629, device='cuda:0')","tensor(16.8556, device='cuda:0')","tensor(0.0615, device='cuda:0')","tensor(14.2813, device='cuda:0')","tensor(0.1451, device='cuda:0')","tensor(17.7191, device='cuda:0')","tensor(0.1760, device='cuda:0')","tensor(14.6512, device='cuda:0')","tensor(0.1572, device='cuda:0')","tensor(21.4199, device='cuda:0')","tensor(0.2812, device='cuda:0')","tensor(17.8453, device='cuda:0')","tensor(0.2612, device='cuda:0')","tensor(15.7942, device='cuda:0')","tensor(0.2117, device='cuda:0')","tensor(17.5308, device='cuda:0')"


In [44]:
new_results = {
    k:{k1:v1.item() for k1,v1 in v.items()} for k,v in results.items()
}

In [45]:
new_results

{'n=100_p=50_ρ=0_sparse': {'ridge_aic': 0.07209954410791397,
  'ridge_adaptive_aic': 0.07209954410791397,
  'lasso_aic': 0.16029036045074463,
  'lasso_adaptive_aic': 0.177296444773674,
  'ridge_bic': 0.07209954410791397,
  'ridge_adaptive_bic': 0.07209954410791397,
  'lasso_bic': 0.16029036045074463,
  'lasso_adaptive_bic': 0.177296444773674,
  'ridge_loocv': 0.07209954410791397,
  'ridge_adaptive_loocv': 0.07209954410791397,
  'lasso_loocv': 0.14632420241832733,
  'lasso_adaptive_loocv': 0.1693868190050125},
 'n=100_p=50_ρ=0_dense': {'ridge_aic': 15.537561416625977,
  'ridge_adaptive_aic': 15.537561416625977,
  'lasso_aic': 20.705595016479492,
  'lasso_adaptive_aic': 20.179044723510742,
  'ridge_bic': 15.537561416625977,
  'ridge_adaptive_bic': 15.537561416625977,
  'lasso_bic': 20.705595016479492,
  'lasso_adaptive_bic': 20.179044723510742,
  'ridge_loocv': 15.537561416625977,
  'ridge_adaptive_loocv': 15.537561416625977,
  'lasso_loocv': 20.40380096435547,
  'lasso_adaptive_loocv': 

In [46]:
new_df = pd.DataFrame(new_results)

In [47]:
new_df.to_csv('results.csv')

In [48]:
new_df

Unnamed: 0,n=100_p=50_ρ=0_sparse,n=100_p=50_ρ=0_dense,n=100_p=50_ρ=0.25_sparse,n=100_p=50_ρ=0.25_dense,n=100_p=50_ρ=0.5_sparse,n=100_p=50_ρ=0.5_dense,n=100_p=25_ρ=0_sparse,n=100_p=25_ρ=0_dense,n=100_p=25_ρ=0.25_sparse,n=100_p=25_ρ=0.25_dense,n=100_p=25_ρ=0.5_sparse,n=100_p=25_ρ=0.5_dense,n=100_p=10_ρ=0_sparse,n=100_p=10_ρ=0_dense,n=100_p=10_ρ=0.25_sparse,n=100_p=10_ρ=0.25_dense,n=100_p=10_ρ=0.5_sparse,n=100_p=10_ρ=0.5_dense
ridge_aic,0.0721,15.537561,0.062941,16.85561,0.061467,14.281297,0.14505,17.719103,0.175983,14.651215,0.157178,21.419931,0.281173,17.845337,0.26124,15.794201,0.211736,17.530773
ridge_adaptive_aic,0.0721,15.537561,0.062941,16.85561,0.061467,14.281297,0.14505,17.719103,0.175983,14.651215,0.157178,21.419931,0.281173,17.845337,0.26124,15.794201,0.211736,17.530773
lasso_aic,0.16029,20.705595,0.146496,21.900187,0.143737,19.355692,0.167975,21.746393,0.200925,18.693914,0.180369,25.658451,0.287331,21.51305,0.269665,19.500269,0.219003,21.17625
lasso_adaptive_aic,0.177296,20.179045,0.168771,21.562302,0.166903,18.804558,0.168337,21.46056,0.202352,18.442568,0.179143,25.279562,0.288986,21.338072,0.26856,19.431223,0.219129,20.919161
ridge_bic,0.0721,15.537561,0.062941,16.85561,0.061467,14.281297,0.14505,17.719103,0.175983,14.651215,0.157178,21.419931,0.281173,17.845337,0.26124,15.794201,0.211736,17.530773
ridge_adaptive_bic,0.0721,15.537561,0.062941,16.85561,0.061467,14.281297,0.14505,17.719103,0.175983,14.651215,0.157178,21.419931,0.281173,17.845337,0.26124,15.794201,0.211736,17.530773
lasso_bic,0.16029,20.705595,0.146496,21.900187,0.143737,19.355692,0.167975,21.746393,0.200925,18.693914,0.180369,25.658451,0.287331,21.51305,0.269665,19.500269,0.219003,21.17625
lasso_adaptive_bic,0.177296,20.179045,0.168771,21.562302,0.166903,18.804558,0.168337,21.46056,0.202352,18.442568,0.179143,25.279562,0.288986,21.338072,0.26856,19.431223,0.219129,20.919161
ridge_loocv,0.0721,15.537561,0.062941,16.85561,0.061467,14.281297,0.14505,17.719103,0.175983,14.651215,0.157178,21.419931,0.281173,17.845337,0.26124,15.794201,0.211736,17.530773
ridge_adaptive_loocv,0.0721,15.537561,0.062941,16.85561,0.061467,14.281297,0.14505,17.719103,0.175983,14.651215,0.157178,21.419931,0.281173,17.845337,0.26124,15.794201,0.211736,17.530773


In [52]:
pd.set_option('display.precision', 4)
new_df


Unnamed: 0,n=100_p=50_ρ=0_sparse,n=100_p=50_ρ=0_dense,n=100_p=50_ρ=0.25_sparse,n=100_p=50_ρ=0.25_dense,n=100_p=50_ρ=0.5_sparse,n=100_p=50_ρ=0.5_dense,n=100_p=25_ρ=0_sparse,n=100_p=25_ρ=0_dense,n=100_p=25_ρ=0.25_sparse,n=100_p=25_ρ=0.25_dense,n=100_p=25_ρ=0.5_sparse,n=100_p=25_ρ=0.5_dense,n=100_p=10_ρ=0_sparse,n=100_p=10_ρ=0_dense,n=100_p=10_ρ=0.25_sparse,n=100_p=10_ρ=0.25_dense,n=100_p=10_ρ=0.5_sparse,n=100_p=10_ρ=0.5_dense
ridge_aic,0.0721,15.5376,0.0629,16.8556,0.0615,14.2813,0.1451,17.7191,0.176,14.6512,0.1572,21.4199,0.2812,17.8453,0.2612,15.7942,0.2117,17.5308
ridge_adaptive_aic,0.0721,15.5376,0.0629,16.8556,0.0615,14.2813,0.1451,17.7191,0.176,14.6512,0.1572,21.4199,0.2812,17.8453,0.2612,15.7942,0.2117,17.5308
lasso_aic,0.1603,20.7056,0.1465,21.9002,0.1437,19.3557,0.168,21.7464,0.2009,18.6939,0.1804,25.6585,0.2873,21.5131,0.2697,19.5003,0.219,21.1763
lasso_adaptive_aic,0.1773,20.179,0.1688,21.5623,0.1669,18.8046,0.1683,21.4606,0.2024,18.4426,0.1791,25.2796,0.289,21.3381,0.2686,19.4312,0.2191,20.9192
ridge_bic,0.0721,15.5376,0.0629,16.8556,0.0615,14.2813,0.1451,17.7191,0.176,14.6512,0.1572,21.4199,0.2812,17.8453,0.2612,15.7942,0.2117,17.5308
ridge_adaptive_bic,0.0721,15.5376,0.0629,16.8556,0.0615,14.2813,0.1451,17.7191,0.176,14.6512,0.1572,21.4199,0.2812,17.8453,0.2612,15.7942,0.2117,17.5308
lasso_bic,0.1603,20.7056,0.1465,21.9002,0.1437,19.3557,0.168,21.7464,0.2009,18.6939,0.1804,25.6585,0.2873,21.5131,0.2697,19.5003,0.219,21.1763
lasso_adaptive_bic,0.1773,20.179,0.1688,21.5623,0.1669,18.8046,0.1683,21.4606,0.2024,18.4426,0.1791,25.2796,0.289,21.3381,0.2686,19.4312,0.2191,20.9192
ridge_loocv,0.0721,15.5376,0.0629,16.8556,0.0615,14.2813,0.1451,17.7191,0.176,14.6512,0.1572,21.4199,0.2812,17.8453,0.2612,15.7942,0.2117,17.5308
ridge_adaptive_loocv,0.0721,15.5376,0.0629,16.8556,0.0615,14.2813,0.1451,17.7191,0.176,14.6512,0.1572,21.4199,0.2812,17.8453,0.2612,15.7942,0.2117,17.5308
