## Regression part b

Implement two-level cross-validation (see algorithm 6 of the lecture notes). We will use
2-level cross-validation to compare the models with K1 = K2 = 10 folds
. As a baseline
model, we will apply a linear regression model with no features, i.e. it computes the mean
of y on the training data, and use this value to predict y on the test data.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import sklearn.model_selection 
from sklearn.linear_model import Ridge
#from toolbox_02450 import train_neural_net, draw_neural_net, visualize_decision_boundary,correlated_ttest
import torch
from scipy import stats
from Data_preprocessing import *


In [4]:
df

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.00,5.73,23.11,1,49,25.30,97.20,52,1
1,144,0.01,4.41,28.61,0,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,1,52,29.14,3.81,46,0
3,170,7.50,6.41,38.03,1,51,31.99,24.26,58,1
4,134,13.60,3.50,27.78,1,60,25.99,57.34,49,1
...,...,...,...,...,...,...,...,...,...,...
457,214,0.40,5.98,31.72,0,64,28.45,0.00,58,0
458,182,4.20,4.41,32.10,0,52,28.61,18.72,52,1
459,108,3.00,1.59,15.23,0,40,20.09,26.64,55,0
460,118,5.40,11.61,30.79,0,64,27.35,23.97,40,0


In [19]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn.linear_model as lm
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from scipy.io import loadmat

# Load dataset
attribute_names = ['sbp','tobacco','chd','adiposity','typea','obesity','alcohol','age','famhist']
class_name      = ['ldl']

# Adjust X and y to keep selected attributes
X = df[attribute_names].values.astype(np.float32)
y = df[class_name].values.astype(np.float32).ravel()

# Compute dataset properties
N, M = X.shape  # Number of observations and attributes

# Standardize data
X_standardized = zscore(X, ddof=1)

# Overwrite X with standardized data as we will only use standardized data in the following
X = X_standardized

# Define K1 (outer CV) and K2 (inner CV)
K1, K2 = 10, 10
outer_cv = KFold(n_splits=K1, shuffle=True, random_state=42)

# Hyperparameter search space
lambda_values = [0.01, 0.1, 1, 10, 100]
h_values = [1, 5, 10, 20]

# Neural Network Model
def create_ann(input_size, hidden_units):
    model = nn.Sequential(
        nn.Linear(input_size, hidden_units),
        nn.ReLU(),
        nn.Linear(hidden_units, 1)
    )
    return model

# Initialize performance tracking
Error_train_baseline = np.empty(K1)
Error_test_baseline = np.empty(K1)
Error_train_lr = np.empty(K1)
Error_test_lr = np.empty(K1)
Error_train_ann = np.empty(K1)
Error_test_ann = np.empty(K1)

for k, (train_index, test_index) in enumerate(outer_cv.split(X)):
    X_train, y_train = X[train_index, :], y[train_index]
    X_test, y_test = X[test_index, :], y[test_index]
    
    # Baseline Model (No Features)
    y_pred_baseline = np.full_like(y_test, y_train.mean())
    Error_train_baseline[k] = np.mean((y_train - y_train.mean()) ** 2)
    Error_test_baseline[k] = np.mean((y_test - y_pred_baseline) ** 2)
    
    # Inner cross-validation for hyperparameter tuning
    inner_cv = KFold(n_splits=K2, shuffle=True, random_state=42)
    best_lambda, best_h = None, None
    min_error_lr, min_error_ann = float("inf"), float("inf")
    
    for train_inner_idx, val_inner_idx in inner_cv.split(X_train):
        X_train_inner, X_val_inner = X_train[train_inner_idx], X_train[val_inner_idx]
        y_train_inner, y_val_inner = y_train[train_inner_idx], y_train[val_inner_idx]
        
        # Standardize data
        scaler = StandardScaler().fit(X_train_inner)
        X_train_inner_scaled = scaler.transform(X_train_inner)
        X_val_inner_scaled = scaler.transform(X_val_inner)
        
        # Ridge Regression Hyperparameter Tuning
        for lambda_ in lambda_values:
            model_lr = lm.Ridge(alpha=lambda_).fit(X_train_inner_scaled, y_train_inner)
            val_error = np.mean((y_val_inner - model_lr.predict(X_val_inner_scaled)) ** 2)
            if val_error < min_error_lr:
                min_error_lr = val_error
                best_lambda = lambda_
        
        # ANN Hyperparameter Tuning
        for h in h_values:
            model_ann = create_ann(M, h)
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model_ann.parameters(), lr=0.01)
            
            X_train_tensor = torch.tensor(X_train_inner_scaled, dtype=torch.float32)
            y_train_tensor = torch.tensor(y_train_inner.reshape(-1, 1), dtype=torch.float32)
            X_val_tensor = torch.tensor(X_val_inner_scaled, dtype=torch.float32)
            y_val_tensor = torch.tensor(y_val_inner.reshape(-1, 1), dtype=torch.float32)
            
            for epoch in range(100):
                optimizer.zero_grad()
                y_pred = model_ann(X_train_tensor)
                loss = criterion(y_pred, y_train_tensor)
                loss.backward()
                optimizer.step()
            
            with torch.no_grad():
                val_pred = model_ann(X_val_tensor)
                val_error = criterion(val_pred, y_val_tensor).item()
            
            if val_error < min_error_ann:
                min_error_ann = val_error
                best_h = h
    
    # Train final models using the best hyperparameters
    scaler = StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model_lr = lm.Ridge(alpha=best_lambda).fit(X_train_scaled, y_train)
    Error_train_lr[k] = np.mean((y_train - model_lr.predict(X_train_scaled)) ** 2)
    Error_test_lr[k] = np.mean((y_test - model_lr.predict(X_test_scaled)) ** 2)
    
    model_ann = create_ann(M, best_h)
    optimizer = optim.Adam(model_ann.parameters(), lr=0.01)
    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32)
    
    for epoch in range(100):
        optimizer.zero_grad()
        y_pred = model_ann(X_train_tensor)
        loss = criterion(y_pred, y_train_tensor)
        loss.backward()
        optimizer.step()
    
    with torch.no_grad():
        test_pred = model_ann(X_test_tensor)
        Error_train_ann[k] = criterion(model_ann(X_train_tensor), y_train_tensor).item()
        Error_test_ann[k] = criterion(test_pred, y_test_tensor).item()
    
    print(f"Outer fold {k+1}/{K1} completed.")

# Print results
print("\nFinal Results:")
print(f"Baseline Model: Train Error = {Error_train_baseline.mean():.4f}, Test Error = {Error_test_baseline.mean():.4f}")
print(f"Ridge Regression: Train Error = {Error_train_lr.mean():.4f}, Test Error = {Error_test_lr.mean():.4f}")
print(f"ANN Model: Train Error = {Error_train_ann.mean():.4f}, Test Error = {Error_test_ann.mean():.4f}")


Outer fold 1/10 completed.
Outer fold 2/10 completed.
Outer fold 3/10 completed.
Outer fold 4/10 completed.
Outer fold 5/10 completed.
Outer fold 6/10 completed.
Outer fold 7/10 completed.
Outer fold 8/10 completed.
Outer fold 9/10 completed.
Outer fold 10/10 completed.

Final Results:
Baseline Model: Train Error = 4.2790, Test Error = 4.2914
Ridge Regression: Train Error = 3.2964, Test Error = 3.4238
ANN Model: Train Error = 3.3073, Test Error = 3.9032


In [None]:
""" 
In this section, we will compare three models: the regularized linear regression model from the previous section, 
an artificial neural network (ANN) and a baseline. 
We are interested in two questions: Is one model better than the other? Is either model better than
 a trivial baseline?. We will attempt to answer these questions with two-level cross-validation.

 1. Implement two-level cross-validation (see algorithm 6 of the lecture notes). We will use
 2-level cross-validation to compare the models with K1 = K2 = 10 folds. As a baseline model, we will apply a linear regression model with no features, i.e. it computes the mean
 of y on the training data, and use this value to predict y on the test data.

 Make sure you can fit an ANN model to the data. As complexity-controlling parameter
 for the ANN, we will use the number of hidden units h. Based on a few test-runs, select
 a reasonable range of values for h (which should include h = 1), and describe the range of
 values you will use for h and lambda.
 """


from dtuimldmtools import (
    draw_neural_net,
    train_neural_net,
    visualize_decision_boundary,
)

attribute_names = ['sbp','tobacco','chd','adiposity','typea','obesity','alcohol','age','famhist']
class_name      = ['ldl']

# Adjust X and y to keep selected attributes
X = df[attribute_names].values.astype(np.float32)
y = df[class_name].values.astype(np.float32).ravel()

# Compute dataset properties
N, M = X.shape  # Number of observations and attributes

# Standardize data
X_standardized = zscore(X, ddof=1)

# Overwrite X with standardized data as we will only use standardized data in the following
X = X_standardized


# Define K1 (outer CV) and K2 (inner CV) folds
K1 = 10 # Number of outer loops
K2 = 10 # Number of inner loops
outer_cv = sklearn.model_selection.KFold(n_splits=K1, shuffle=True, random_state=42)

# Define ANN hyperparameters
min_n_hidden_units = 1  # Minimum number of hidden units
max_n_hidden_units = 5  # Maximum number of hidden units
max_iter = 10000  # Maximum number of iterations for training
loss_fn   = torch.nn.MSELoss()

# Hyperparameters search space 
lambda_values = [0.01, 0.1, 1, 10, 100] 
h_values = [1, 5, 10, 20]

# Define lists to store outer CV results
error_train_baseline = np.empty(K1)
error_test_baseline = np.empty(K1)
error_test_rlr = np.empty(K1)
error_test_ann = np.empty(K1)
best_lamdas = []
best_hs = []
results_table = []


# Loop over outer folds
for k, (train_index, test_index) in enumerate(outer_cv.split(X)):
    print(f"Outer fold {k+1}/{K1}")

    # Extract training and test set for current CV fold
    X_train_outer = X[train_index,:]
    y_train_outer = y[train_index]
    X_test_outer = X[test_index,:]
    y_test_outer = y[test_index]
  

    # Baseline model: mean of y_train (no features)
    y_pred_baseline = np.full_like(y_test_outer, np.mean(y_train_outer))
    error_train_baseline[k] =  np.square(y_train_outer - y_pred_baseline).sum() / len(y_train_outer)
    error_test_baseline[k] = np.square(y_test_outer - y_pred_baseline).sum() / len(y_test_outer)
    
    
    # Inner cross-validation for RLR and ANN hyperparameter tuning
    inner_cv = sklearn.model_selection.KFold(n_splits=K2, shuffle=True, random_state=42)
    best_lambda = None
    best_h = None
    min_error_rlr = np.inf
    min_error_ann = np.inf

    # Loop over inner folds
    for train_inner_index, val_index in inner_cv.split(X_train_outer):

        # Extract training and validation set for current CV fold
        X_train_inner = X_train_outer[train_inner_index,:]
        y_train_inner = y_train_outer[train_inner_index]
        X_val = X_train_outer[val_index,:]
        y_val = y_train_outer[val_index]
      


        # Hyperparameter tuning for RLR
        for lambda_value in lambda_values:
        
            model_rlr = Ridge(alpha=lambda_value)  # Define model with regularization parameter lambda
            model_rlr.fit(X_train_inner, y_train_inner)  # Train
            y_pred_rlr = model_rlr.predict(X_val)  # Predict validation set
            error_rlr = np.square(y_val - y_pred_rlr).sum() / len(y_val)  # Compute validation error

            if error_rlr < min_error_rlr:
                min_error_rlr = error_rlr
                best_lambda = lambda_value
        
        # Hyperparameter tuning for ANN

        # Convert data to PyTorch tensors
        X_train_inner_tensor = torch.tensor(X_train_inner, dtype=torch.float32)
        y_train_inner_tensor = torch.tensor(y_train_inner, dtype=torch.float32).view(-1, 1)
        X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
        y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

        for n_hidden_units in range(min_n_hidden_units,max_n_hidden_units + 1):
            # Define ANN model
            model = lambda: torch.nn.Sequential(
                torch.nn.Linear(M, n_hidden_units),  # M features to H hidden units
                # 1st transfer function, either Tanh or ReLU:
                torch.nn.Tanh(),  # torch.nn.ReLU(),
                torch.nn.Linear(n_hidden_units, 1),  # H hidden units to 1 output neuron
                # No final transfer function, 
            )

            # Train the ANN model using the training data
            net, final_loss, learning_curve = train_neural_net(model, 
                                                               loss_fn,
                                                               X=X_train_inner_tensor, 
                                                               y=y_train_inner_tensor, 
                                                               n_replicates=1,
                                                               max_iter=max_iter)

            # Determine estimated y values for the validation set
            y_est_inner = net(X_val_tensor).detach().numpy()

            # Compute validation error
            error_ann = np.square(y_val - y_est_inner).sum() / len(y_val)

            # Check if the current error is less than the minimum error
            if error_ann < min_error_ann:
                min_error_ann = error_ann
                best_h = n_hidden_units
                best_model = net


       
        




Fold 1: Best Lambda for RLR: 100
Fold 2: Best Lambda for RLR: 100
Fold 3: Best Lambda for RLR: 0.01
Fold 4: Best Lambda for RLR: 100
Fold 5: Best Lambda for RLR: 1
Fold 6: Best Lambda for RLR: 0.01
Fold 7: Best Lambda for RLR: 100
Fold 8: Best Lambda for RLR: 0.01
Fold 9: Best Lambda for RLR: 100
Fold 10: Best Lambda for RLR: 100


In [8]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch
from dtuimldmtools import train_neural_net
import warnings
from sklearn.exceptions import ConvergenceWarning

# Attribute selection
attribute_names = ['sbp','tobacco','chd','adiposity','typea','obesity','alcohol','age','famhist']
class_name = ['ldl']

# Prepare data
X = df[attribute_names].values.astype(np.float32)
y = df[class_name].values.astype(np.float32).ravel()
N, M = X.shape

# Standardize
X = zscore(X, ddof=1)

# Cross-validation setup
K1 = 10
K2 = 5
outer_cv = KFold(n_splits=K1, shuffle=True, random_state=42)

# Hyperparameter ranges
lambda_values = [0.01, 0.1, 1, 10, 100]
h_values = [1, 2, 3, 5]
max_iter = 2000
loss_fn = torch.nn.MSELoss()

# Storage
error_train_baseline = np.empty(K1)
error_test_baseline = np.empty(K1)
error_test_rlr = np.empty(K1)
error_test_ann = np.empty(K1)
best_lamdas = []
best_hs = []
results_table = []

# Outer CV loop
for k, (train_index, test_index) in enumerate(outer_cv.split(X)):
    print(f"Outer fold {k+1}/{K1}")

    X_train_outer, y_train_outer = X[train_index], y[train_index]
    X_test_outer, y_test_outer = X[test_index], y[test_index]

    # --- Baseline model ---
    y_pred_baseline = np.full_like(y_test_outer, np.mean(y_train_outer))
    error_train_baseline[k] = np.square(y_train_outer - np.mean(y_train_outer)).sum() / len(y_train_outer)
    error_test_baseline[k] = np.square(y_test_outer - y_pred_baseline).sum() / len(y_test_outer)

    # --- Inner CV for hyperparameter tuning ---
    inner_cv = KFold(n_splits=K2, shuffle=True, random_state=42)
    best_lambda = None
    best_h = None
    min_error_rlr = np.inf
    min_error_ann = np.inf

    for train_inner_index, val_index in inner_cv.split(X_train_outer):
        X_train_inner = X_train_outer[train_inner_index]
        y_train_inner = y_train_outer[train_inner_index]
        X_val = X_train_outer[val_index]
        y_val = y_train_outer[val_index]

        # --- RLR tuning ---
        for lambda_value in lambda_values:
            model_rlr = Ridge(alpha=lambda_value)
            model_rlr.fit(X_train_inner, y_train_inner)
            y_pred_rlr = model_rlr.predict(X_val)
            error_rlr = np.square(y_val - y_pred_rlr).sum() / len(y_val)

            if error_rlr < min_error_rlr:
                min_error_rlr = error_rlr
                best_lambda = lambda_value

        # --- ANN tuning ---
        X_train_inner_tensor = torch.tensor(X_train_inner, dtype=torch.float32)
        y_train_inner_tensor = torch.tensor(y_train_inner, dtype=torch.float32).view(-1, 1)
        X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
        y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

        for h in h_values:
            model = lambda: torch.nn.Sequential(
                torch.nn.Linear(M, h),
                torch.nn.Tanh(),
                torch.nn.Linear(h, 1),
            )

            with warnings.catch_warnings():
                warnings.simplefilter("ignore", ConvergenceWarning)
                net, final_loss, learning_curve = train_neural_net(
                    model, loss_fn,
                    X=X_train_inner_tensor,
                    y=y_train_inner_tensor,
                    n_replicates=1,
                    max_iter=max_iter
                )

            y_est_val = net(X_val_tensor).detach().numpy()
            error_ann = np.square(y_val - y_est_val).sum() / len(y_val)

            if error_ann < min_error_ann:
                min_error_ann = error_ann
                best_h = h
                best_ann_model = net

    # Save best hyperparameters
    best_lamdas.append(best_lambda)
    best_hs.append(best_h)

    # --- Train best models on outer training set ---
    # RLR
    final_rlr = Ridge(alpha=best_lambda)
    final_rlr.fit(X_train_outer, y_train_outer)
    y_test_pred_rlr = final_rlr.predict(X_test_outer)
    error_test_rlr[k] = np.square(y_test_outer - y_test_pred_rlr).sum() / len(y_test_outer)

    # ANN
    X_train_outer_tensor = torch.tensor(X_train_outer, dtype=torch.float32)
    y_train_outer_tensor = torch.tensor(y_train_outer, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test_outer, dtype=torch.float32)

    best_ann = lambda: torch.nn.Sequential(
        torch.nn.Linear(M, best_h),
        torch.nn.Tanh(),
        torch.nn.Linear(best_h, 1),
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        trained_ann, _, _ = train_neural_net(best_ann, loss_fn,
                                             X=X_train_outer_tensor,
                                             y=y_train_outer_tensor,
                                             n_replicates=1,
                                             max_iter=max_iter)

    y_test_pred_ann = trained_ann(X_test_tensor).detach().numpy()
    error_test_ann[k] = np.square(y_test_outer - y_test_pred_ann).sum() / len(y_test_outer)

    # --- Store results ---
    results_table.append({
        "Fold": k+1,
        "lambda*": best_lambda,
        "h*": best_h,
        "Baseline Test Error": error_test_baseline[k],
        "RLR Test Error": error_test_rlr[k],
        "ANN Test Error": error_test_ann[k]
    })


Outer fold 1/10

	Replicate: 1/1
		Iter	Loss			Rel. loss
		1000	16.207216	0.00082922744
		2000	7.632791	0.0006451087
		Final loss:
		2000	7.632791	0.0006451087

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.280691	0.0017139013
		2000	3.5814657	0.0001591438
		Final loss:
		2000	3.5814657	0.0001591438

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.4360847	0.0010515479
		2000	3.3162155	0.00011149637
		Final loss:
		2000	3.3162155	0.00011149637

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.2795722	0.00010794499
		2000	2.9918659	6.0400558e-05
		Final loss:
		2000	2.9918659	6.0400558e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	13.536421	0.0010224166
		2000	5.905298	0.0006121744
		Final loss:
		2000	5.905298	0.0006121744

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.7830997	0.0014897201
		2000	3.4614646	0.0001575679
		Final loss:
		2000	3.4614646	0.0001575679

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.7695584	0.00063265284
		2000	3.2003467	5.378447e-05
		Final loss:
		2000	3.2003467	5.378447e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4303567	0.00080141675
		2000	2.8602207	0.000103518316
		Final loss:
		2000	2.8602207	0.000103518316

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.7294035	0.00097357744
		2000	5.214158	0.00045366274
		Final loss:
		2000	5.214158	0.00045366274

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.5863523	0.0020741022
		2000	3.8032153	0.0001303128
		Final loss:
		2000	3.8032153	0.0001303128

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.168792	0.0007077557
		2000	3.448579	6.477559e-05
		Final loss:
		2000	3.448579	6.477559e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6777565	0.00028152997
		2000	3.248053	6.804045e-05
		Final loss:
		2000	3.248053	6.804045e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	11.05074	0.0015407454
		2000	4.3108244	0.0005153058
		Final loss:
		2000	4.3108244	0.0005153058

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.2782826	0.0017262435
		2000	2.9330633	5.1126623e-05
		Final loss:
		2000	2.9330633	5.1126623e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.200888	0.0014202434
		2000	2.8854237	0.00011607974
		Final loss:
		2000	2.8854237	0.00011607974

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.036166	0.00051055214
		2000	2.5958233	8.146165e-05
		Final loss:
		2000	2.5958233	8.146165e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.501074	0.0015397861
		2000	4.373277	0.0002817742
		Final loss:
		2000	4.373277	0.0002817742

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.88243	0.0016630191
		2000	3.8698988	0.0001496862
		Final loss:
		2000	3.8698988	0.0001496862

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.447535	0.00081887125
		2000	3.549123	0.000121910954
		Final loss:
		2000	3.549123	0.000121910954

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6885078	0.00021965706
		2000	3.180179	9.655226e-05
		Final loss:
		2000	3.180179	9.655226e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.970718	0.0010957015
		2000	4.541369	0.00035466428
		Final loss:
		2000	4.541369	0.00035466428
Outer fold 2/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	13.423947	0.0008530636
		2000	6.4779334	0.00056772167
		Final loss:
		2000	6.4779334	0.00056772167

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.597267	0.0015259858
		2000	3.6555147	9.1301925e-05
		Final loss:
		2000	3.6555147	9.1301925e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.155023	0.001670982
		2000	3.369345	8.504761e-05
		Final loss:
		2000	3.369345	8.504761e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.3723505	0.0002601718
		2000	2.8720107	0.000110397115
		Final loss:
		2000	2.8720107	0.000110397115

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	14.025117	0.0009427132
		2000	6.323362	0.0006219618
		Final loss:
		2000	6.323362	0.0006219618

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.293403	0.0018960646
		2000	3.782272	0.00020179992
		Final loss:
		2000	3.782272	0.00020179992

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.5828643	0.0015703174
		2000	3.2097757	8.3482504e-05
		Final loss:
		2000	3.2097757	8.3482504e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5407245	0.00041266726
		2000	3.1093235	0.00010319875
		Final loss:
		2000	3.1093235	0.00010319875

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.5096188	0.00094609614
		2000	4.2315774	0.0002711603
		Final loss:
		2000	4.2315774	0.0002711603

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.4397674	0.0007528535
		2000	3.5264826	5.9221136e-05
		Final loss:
		2000	3.5264826	5.9221136e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.017121	0.00069961004
		2000	3.3078032	9.0016845e-05
		Final loss:
		2000	3.3078032	9.0016845e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.2707767	0.0014295446
		2000	3.0253549	0.00011354772
		Final loss:
		2000	3.0253549	0.00011354772

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	14.375162	0.0016188354
		2000	5.1214557	0.0006502002
		Final loss:
		2000	5.1214557	0.0006502002

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.7194145	0.00097543566
		2000	2.9003892	5.934652e-05
		Final loss:
		2000	2.9003892	5.934652e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.82676	0.0010786084
		2000	2.9334686	6.8510395e-05
		Final loss:
		2000	2.9334686	6.8510395e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.9009523	0.00022152522
		2000	2.6140397	7.9070174e-05
		Final loss:
		2000	2.6140397	7.9070174e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.8936133	0.0009394229
		2000	3.5238802	0.0001942083
		Final loss:
		2000	3.5238802	0.0001942083

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.9417512	0.0010615407
		2000	3.05924	5.0576557e-05
		Final loss:
		2000	3.05924	5.0576557e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5956943	0.0008282134
		2000	2.9261777	6.183784e-05
		Final loss:
		2000	2.9261777	6.183784e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.084358	0.00042264824
		2000	2.7466505	4.7913178e-05
		Final loss:
		2000	2.7466505	4.7913178e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.442088	0.0019432039
		2000	3.5239522	0.000141247
		Final loss:
		2000	3.5239522	0.000141247
Outer fold 3/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.6667576	0.00084798073
		2000	3.556981	0.00017618648
		Final loss:
		2000	3.556981	0.00017618648

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.8947423	0.0010736677
		2000	3.0166886	5.097382e-05
		Final loss:
		2000	3.0166886	5.097382e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4495482	0.0006538232
		2000	2.922027	7.277615e-05
		Final loss:
		2000	2.922027	7.277615e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.8876636	0.00018623089
		2000	2.5790467	9.946035e-05
		Final loss:
		2000	2.5790467	9.946035e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.684647	0.0011923824
		2000	4.497518	0.0005521771
		Final loss:
		2000	4.497518	0.0005521771

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5537326	0.0006633444
		2000	2.9357908	0.0001386892
		Final loss:
		2000	2.9357908	0.0001386892

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.0866203	0.0003921613
		2000	2.6895907	9.998149e-05
		Final loss:
		2000	2.6895907	9.998149e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.914483	0.00064077484


  if loss_value < best_final_loss:


		2000	2.4614441	6.770144e-05
		Final loss:
		2000	2.4614441	6.770144e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss
		1000	5.389856	0.0008742855
		2000	3.5488365	0.00012346561
		Final loss:
		2000	3.5488365	0.00012346561

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.78523	0.0015819147
		2000	3.2937722	0.00011551257
		Final loss:
		2000	3.2937722	0.00011551257

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.909346	0.0010634537
		2000	3.0100915	0.00010755075
		Final loss:
		2000	3.0100915	0.00010755075

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.2617779	0.00046926676
		2000	2.6747248	7.38005e-05
		Final loss:
		2000	2.6747248	7.38005e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.382852	0.00084737485
		2000	4.220028	0.00029606916
		Final loss:
		2000	4.220028	0.00029606916

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	9.934169	0.0021366915
		2000	3.6267254	0.00022372669
		Final loss:
		2000	3.6267254	0.00022372669

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6250443	0.0008304432
		2000	3.0429366	4.434497e-05
		Final loss:
		2000	3.0429366	4.434497e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.3539267	0.00032909279
		2000	2.8990517	9.6129545e-05
		Final loss:
		2000	2.8990517	9.6129545e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.136082	0.00090349844
		2000	3.780468	0.00018512718
		Final loss:
		2000	3.780468	0.00018512718

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.4634624	0.0012468515
		2000	3.5493476	0.00022054586
		Final loss:
		2000	3.5493476	0.00022054586

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.2305374	0.0014951553
		2000	3.0300643	0.00010518988
		Final loss:
		2000	3.0300643	0.00010518988

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4758763	0.0006709316
		2000	2.7974453	0.000113509814
		Final loss:
		2000	2.7974453	0.000113509814

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.7431927	0.0010339685
		2000	3.72937	0.00024140527
		Final loss:
		2000	3.72937	0.00024140527
Outer fold 4/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	14.618945	0.00092438
		2000	6.5759864	0.0006444326
		Final loss:
		2000	6.5759864	0.0006444326

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.0510216	0.0010815497
		2000	3.3834927	6.813523e-05
		Final loss:
		2000	3.3834927	6.813523e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.926874	0.0008403119
		2000	3.0411325	7.878374e-05
		Final loss:
		2000	3.0411325	7.878374e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.1656027	0.00024049947
		2000	2.7974074	6.724069e-05
		Final loss:
		2000	2.7974074	6.724069e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.4792047	0.0011582976
		2000	3.876436	0.0002913238
		Final loss:
		2000	3.876436	0.0002913238

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.730862	0.0012432443
		2000	3.2320223	8.991464e-05
		Final loss:
		2000	3.2320223	8.991464e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.81709	0.00083777163
		2000	3.0358775	5.606989e-05
		Final loss:
		2000	3.0358775	5.606989e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.988785	0.00022801348
		2000	2.6101775	6.923237e-05
		Final loss:
		2000	2.6101775	6.923237e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	13.649125	0.00094759633
		2000	6.264951	0.00057697546
		Final loss:
		2000	6.264951	0.00057697546

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.077416	0.0014549418
		2000	4.0190864	0.0001782889
		Final loss:
		2000	4.0190864	0.0001782889

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.7585795	0.00049028813
		2000	3.2448266	4.1806394e-05
		Final loss:
		2000	3.2448266	4.1806394e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.527997	0.0004517643
		2000	3.1123304	5.8599024e-05
		Final loss:
		2000	3.1123304	5.8599024e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.375956	0.0011127017
		2000	4.1513567	0.00021015506
		Final loss:
		2000	4.1513567	0.00021015506

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.7074175	0.0011099407
		2000	3.783122	0.00014889798
		Final loss:
		2000	3.783122	0.00014889798

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.8866432	0.00037282673
		2000	3.3775287	9.189928e-05
		Final loss:
		2000	3.3775287	9.189928e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6568902	0.00027655473
		2000	3.1558177	7.932006e-05
		Final loss:
		2000	3.1558177	7.932006e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	13.116503	0.0008316686
		2000	6.4335203	0.0005559434
		Final loss:
		2000	6.4335203	0.0005559434

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.5941978	0.001218097
		2000	3.6396704	0.00010014794
		Final loss:
		2000	3.6396704	0.00010014794

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.234257	0.0010267737
		2000	3.183771	6.36487e-05
		Final loss:
		2000	3.183771	6.36487e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5929055	0.0006582375
		2000	2.9999208	7.772047e-05
		Final loss:
		2000	2.9999208	7.772047e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.325926	0.0014606885
		2000	4.084201	0.00025387036
		Final loss:
		2000	4.084201	0.00025387036
Outer fold 5/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.196219	0.0010863169
		2000	4.5688286	0.0005094711
		Final loss:
		2000	4.5688286	0.0005094711

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.0115285	0.0010586935
		2000	3.0443463	5.3094882e-05
		Final loss:
		2000	3.0443463	5.3094882e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.8949218	0.0010526669
		2000	2.9451132	0.0001072525
		Final loss:
		2000	2.9451132	0.0001072525

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.272911	0.00082095584
		2000	2.6983902	7.297661e-05
		Final loss:
		2000	2.6983902	7.297661e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.582174	0.00080009963
		2000	3.7310183	0.000137306
		Final loss:
		2000	3.7310183	0.000137306

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.68503	0.0012284457
		2000	3.4376042	0.00011844607
		Final loss:
		2000	3.4376042	0.00011844607

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.9904177	0.0009605973
		2000	3.2281146	7.968526e-05
		Final loss:
		2000	3.2281146	7.968526e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.2198064	0.00017723827
		2000	2.9146154	6.862637e-05
		Final loss:
		2000	2.9146154	6.862637e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	14.5746355	0.0008789193
		2000	6.8051033	0.00061498093
		Final loss:
		2000	6.8051033	0.00061498093

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.8841767	0.0015648828
		2000	3.3431642	8.164929e-05
		Final loss:
		2000	3.3431642	8.164929e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.9848633	0.0007686566
		2000	3.2944086	4.6242723e-05
		Final loss:
		2000	3.2944086	4.6242723e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6094842	0.0007755251
		2000	3.0367024	6.131437e-05
		Final loss:
		2000	3.0367024	6.131437e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	9.388531	0.001189897
		2000	4.282002	0.00040651578
		Final loss:
		2000	4.282002	0.00040651578

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.0166926	0.0008557876
		2000	3.2243094	5.2793283e-05
		Final loss:
		2000	3.2243094	5.2793283e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5132775	0.00042687423
		2000	2.9484684	9.831833e-05
		Final loss:
		2000	2.9484684	9.831833e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.3178692	0.00021825983
		2000	2.8081348	9.049822e-05
		Final loss:
		2000	2.8081348	9.049822e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.50802	0.0012988128
		2000	3.3797908	0.0003797971
		Final loss:
		2000	3.3797908	0.0003797971

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.770112	0.0013483865
		2000	2.7848341	0.00013148459
		Final loss:
		2000	2.7848341	0.00013148459

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.931998	0.00052047713
		2000	2.543268	4.302709e-05
		Final loss:
		2000	2.543268	4.302709e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.6385674	0.00038098948
		2000	2.354108	5.7724938e-05
		Final loss:
		2000	2.354108	5.7724938e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.760004	0.001095723
		2000	3.267779	7.94476e-05
		Final loss:
		2000	3.267779	7.94476e-05
Outer fold 6/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	12.310301	0.001023951
		2000	5.490315	0.00056594546
		Final loss:
		2000	5.490315	0.00056594546

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.45224	0.0009991053
		2000	3.3177567	0.00010440369
		Final loss:
		2000	3.3177567	0.00010440369

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.2424417	0.0010016881
		2000	3.2379644	8.967603e-05
		Final loss:
		2000	3.2379644	8.967603e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.248578	0.00028746593
		2000	2.8747973	5.8050464e-05
		Final loss:
		2000	2.8747973	5.8050464e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	15.152143	0.0026411393
		2000	4.2297845	0.0006064748
		Final loss:
		2000	4.2297845	0.0006064748

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.151233	0.0012730329
		2000	3.0265315	9.885433e-05
		Final loss:
		2000	3.0265315	9.885433e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.6748443	0.0016827347
		2000	2.7037485	9.3198374e-05
		Final loss:
		2000	2.7037485	9.3198374e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.684586	0.00018211652
		2000	2.453886	4.857744e-05
		Final loss:
		2000	2.453886	4.857744e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.135219	0.00084838475
		2000	3.9338162	0.00016094749
		Final loss:
		2000	3.9338162	0.00016094749

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.9494457	0.00092113914
		2000	3.6049757	9.046583e-05
		Final loss:
		2000	3.6049757	9.046583e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.4292426	0.0016758351
		2000	3.4513185	8.5099855e-05
		Final loss:
		2000	3.4513185	8.5099855e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4707537	0.00023741741
		2000	3.105646	7.1467206e-05
		Final loss:
		2000	3.105646	7.1467206e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	11.092386	0.0010725973
		2000	4.983112	0.000516845
		Final loss:
		2000	4.983112	0.000516845

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.2170463	0.0010352185
		2000	3.5022588	0.000110134286
		Final loss:
		2000	3.5022588	0.000110134286

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.7353606	0.0019506398
		2000	3.288796	0.00013054498
		Final loss:
		2000	3.288796	0.00013054498

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.472135	0.00031796106
		2000	3.0068886	7.318005e-05
		Final loss:
		2000	3.0068886	7.318005e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	14.522059	0.0008449931
		2000	6.838027	0.0006302691
		Final loss:
		2000	6.838027	0.0006302691

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.6798253	0.0010524986
		2000	3.445078	0.00017948703
		Final loss:
		2000	3.445078	0.00017948703

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4025686	0.0005911127
		2000	3.018567	8.418988e-05
		Final loss:
		2000	3.018567	8.418988e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.3567126	0.00057094725
		2000	2.8796947	8.3200044e-05
		Final loss:
		2000	2.8796947	8.3200044e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	9.606579	0.001314432
		2000	4.359902	0.0004028616
		Final loss:
		2000	4.359902	0.0004028616
Outer fold 7/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.775732	0.0010041463
		2000	3.6227431	0.00030053713
		Final loss:
		2000	3.6227431	0.00030053713

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.6705747	0.0014586287
		2000	2.9972415	9.329875e-05
		Final loss:
		2000	2.9972415	9.329875e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6816502	0.0013641549
		2000	2.7420492	6.5989974e-05
		Final loss:
		2000	2.7420492	6.5989974e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.895633	0.00015913266
		2000	2.565333	9.590349e-05
		Final loss:
		2000	2.565333	9.590349e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.154788	0.001033766
		2000	4.848738	0.00044087178
		Final loss:
		2000	4.848738	0.00044087178

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.1051083	0.001197963
		2000	3.555986	9.948801e-05
		Final loss:
		2000	3.555986	9.948801e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.2896886	0.0011546069
		2000	3.3291397	2.3417777e-05
		Final loss:
		2000	3.3291397	2.3417777e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4426153	0.00029210182
		2000	2.9887033	7.968713e-05
		Final loss:
		2000	2.9887033	7.968713e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.9858737	0.00087512063
		2000	4.4636006	0.0003066089
		Final loss:
		2000	4.4636006	0.0003066089

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.670056	0.0011555076
		2000	3.6582005	0.00012355413
		Final loss:
		2000	3.6582005	0.00012355413

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.1844697	0.0018402018
		2000	3.5043578	0.00014292086
		Final loss:
		2000	3.5043578	0.00014292086

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5437872	0.0005778522
		2000	2.9888453	7.928455e-05
		Final loss:
		2000	2.9888453	7.928455e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.210986	0.0009224699
		2000	5.1014023	0.00045770867
		Final loss:
		2000	5.1014023	0.00045770867

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.3755374	0.00085137325
		2000	3.3168821	7.8918376e-05
		Final loss:
		2000	3.3168821	7.8918376e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.6174424	0.0003867965
		2000	3.12738	7.325732e-05
		Final loss:
		2000	3.12738	7.325732e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.2218835	0.00019598693
		2000	2.8950512	7.353657e-05
		Final loss:
		2000	2.8950512	7.353657e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.0484605	0.0007779621
		2000	3.57241	9.235805e-05
		Final loss:
		2000	3.57241	9.235805e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.499467	0.0016795323
		2000	3.2929113	7.333945e-05
		Final loss:
		2000	3.2929113	7.333945e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.933986	0.0012699224
		2000	3.2581925	0.00014098854
		Final loss:
		2000	3.2581925	0.00014098854

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.2044296	0.00018418743
		2000	2.8855865	8.212147e-05
		Final loss:
		2000	2.8855865	8.212147e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	17.165972	0.0013057499
		2000	6.6202245	0.0006963792
		Final loss:
		2000	6.6202245	0.0006963792
Outer fold 8/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.585895	0.0012328496
		2000	4.228398	0.0003345899
		Final loss:
		2000	4.228398	0.0003345899

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.174656	0.000993428
		2000	3.3092961	3.9407118e-05
		Final loss:
		2000	3.3092961	3.9407118e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.0677314	0.00090321765
		2000	3.2570245	9.786062e-05
		Final loss:
		2000	3.2570245	9.786062e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.2126071	0.00016739746
		2000	2.7687776	8.851293e-05
		Final loss:
		2000	2.7687776	8.851293e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.769883	0.00089674914
		2000	3.842537	0.0002698324
		Final loss:
		2000	3.842537	0.0002698324

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.8223286	0.0014407187
		2000	3.3201392	9.880054e-05
		Final loss:
		2000	3.3201392	9.880054e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.7728546	0.0009868605
		2000	3.0740266	6.5067805e-05
		Final loss:
		2000	3.0740266	6.5067805e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.1285396	0.00018240778
		2000	2.7694073	0.00011457281
		Final loss:
		2000	2.7694073	0.00011457281

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	14.894468	0.00093132653
		2000	6.6433296	0.0006514525
		Final loss:
		2000	6.6433296	0.0006514525

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.167708	0.0023642438
		2000	3.4447224	9.1352435e-05
		Final loss:
		2000	3.4447224	9.1352435e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.306985	0.0011775893
		2000	3.1859949	7.7745775e-05
		Final loss:
		2000	3.1859949	7.7745775e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.580702	0.0007794937
		2000	2.836828	8.1348044e-05
		Final loss:
		2000	2.836828	8.1348044e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.414024	0.00086920446
		2000	4.038261	0.00017354722
		Final loss:
		2000	4.038261	0.00017354722

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.395355	0.0013419901
		2000	3.7195153	9.966458e-05
		Final loss:
		2000	3.7195153	9.966458e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.1775703	0.0009923939
		2000	3.3182306	5.7908648e-05
		Final loss:
		2000	3.3182306	5.7908648e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4413033	0.00016500126
		2000	3.1161747	7.45917e-05
		Final loss:
		2000	3.1161747	7.45917e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	9.856353	0.001189183
		2000	4.414261	0.0004515439
		Final loss:
		2000	4.414261	0.0004515439

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.829692	0.0013765243
		2000	3.5413048	0.00016983229
		Final loss:
		2000	3.5413048	0.00016983229

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.7446482	0.0006731026
		2000	3.0670438	9.117558e-05
		Final loss:
		2000	3.0670438	9.117558e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.119688	0.00018621025
		2000	2.774464	6.7796704e-05
		Final loss:
		2000	2.774464	6.7796704e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	12.811365	0.0013168143
		2000	5.267444	0.00057197385
		Final loss:
		2000	5.267444	0.00057197385
Outer fold 9/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	12.123226	0.0010238776
		2000	5.3720536	0.0005837166
		Final loss:
		2000	5.3720536	0.0005837166

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.9876885	0.0013741609
		2000	3.5383146	0.00021766374
		Final loss:
		2000	3.5383146	0.00021766374

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.2378664	0.0014756335
		2000	2.8843372	8.942985e-05
		Final loss:
		2000	2.8843372	8.942985e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.1141162	0.00037286413
		2000	2.6361566	0.00011412445
		Final loss:
		2000	2.6361566	0.00011412445

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.288435	0.00088403566
		2000	4.4569607	0.00036479975
		Final loss:
		2000	4.4569607	0.00036479975

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.645292	0.00092268595
		2000	3.3637047	6.711863e-05
		Final loss:
		2000	3.3637047	6.711863e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.8036149	0.00075011776
		2000	3.2147503	3.0406294e-05
		Final loss:
		2000	3.2147503	3.0406294e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.216244	0.0002530882
		2000	2.877426	4.1510277e-05
		Final loss:
		2000	2.877426	4.1510277e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	9.78799	0.0014068035
		2000	4.2589626	0.00039954062
		Final loss:
		2000	4.2589626	0.00039954062

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.884957	0.0013126857
		2000	3.4546394	0.00012358885
		Final loss:
		2000	3.4546394	0.00012358885

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.952309	0.0009461887
		2000	3.1891649	9.1048096e-05
		Final loss:
		2000	3.1891649	9.1048096e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.1953986	0.00039700783
		2000	2.8723385	6.465675e-05
		Final loss:
		2000	2.8723385	6.465675e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	13.250224	0.0012385795
		2000	5.2714286	0.0006225889
		Final loss:
		2000	5.2714286	0.0006225889

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.6150203	0.0013628287
		2000	3.1479323	5.8845082e-05
		Final loss:
		2000	3.1479323	5.8845082e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.4318416	0.0006458851
		2000	2.9903746	8.1157195e-05
		Final loss:
		2000	2.9903746	8.1157195e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.550418	0.00087221793
		2000	2.8532808	0.00010878257
		Final loss:
		2000	2.8532808	0.00010878257

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	10.088101	0.0010946451
		2000	4.505039	0.0005257745
		Final loss:
		2000	4.505039	0.0005257745

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.89425	0.0018338313
		2000	2.9258206	7.251885e-05
		Final loss:
		2000	2.9258206	7.251885e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.5088143	0.0008862133
		2000	2.8600018	6.310188e-05
		Final loss:
		2000	2.8600018	6.310188e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	2.9255867	0.00020589364
		2000	2.6205838	6.231686e-05
		Final loss:
		2000	2.6205838	6.231686e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.310712	0.0013705901
		2000	3.4468405	0.00019018182
		Final loss:
		2000	3.4468405	0.00019018182
Outer fold 10/10

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	9.529319	0.00096771674
		2000	4.764792	0.00041773915
		Final loss:
		2000	4.764792	0.00041773915

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.9161344	0.0011717718
		2000	3.4895458	9.4141156e-05
		Final loss:
		2000	3.4895458	9.4141156e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.8191998	0.0006456952
		2000	3.2434766	9.4888666e-05
		Final loss:
		2000	3.2434766	9.4888666e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.913376	0.001065521
		2000	2.968923	0.00010430497
		Final loss:
		2000	2.968923	0.00010430497

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	11.484712	0.0013285938
		2000	4.756101	0.000549012
		Final loss:
		2000	4.756101	0.000549012

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.921834	0.0019321837
		2000	3.4895856	0.00023565843
		Final loss:
		2000	3.4895856	0.00023565843

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.549413	0.0014896842
		2000	3.1109993	7.348977e-05
		Final loss:
		2000	3.1109993	7.348977e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.0498064	0.00029322656
		2000	2.6943786	6.8484565e-05
		Final loss:
		2000	2.6943786	6.8484565e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	7.3585052	0.0009490137
		2000	4.138777	0.00025916004
		Final loss:
		2000	4.138777	0.00025916004

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.486575	0.001089867
		2000	3.8533213	0.00017426735
		Final loss:
		2000	3.8533213	0.00017426735

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.3676085	0.0021328367
		2000	3.4584558	7.575698e-05
		Final loss:
		2000	3.4584558	7.575698e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.8387628	0.0008346578
		2000	3.2795458	0.00012742452
		Final loss:
		2000	3.2795458	0.00012742452

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	6.635843	0.0008126976
		2000	4.2435555	0.00017189252
		Final loss:
		2000	4.2435555	0.00017189252

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.188184	0.0009405313
		2000	3.7832303	6.42131e-05
		Final loss:
		2000	3.7832303	6.42131e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.9609933	0.0015729801
		2000	3.362724	8.904301e-05
		Final loss:
		2000	3.362724	8.904301e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	4.063631	0.00088913523
		2000	3.1608217	7.753531e-05
		Final loss:
		2000	3.1608217	7.753531e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.7889185	0.001364261
		2000	3.9175847	0.00034914535
		Final loss:
		2000	3.9175847	0.00034914535

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	5.018126	0.0015097226
		2000	3.240951	9.2167684e-05
		Final loss:
		2000	3.240951	9.2167684e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.664317	0.001036645
		2000	3.022195	3.8575334e-05
		Final loss:
		2000	3.022195	3.8575334e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	3.1219113	0.00048219305
		2000	2.6594005	8.668524e-05
		Final loss:
		2000	2.6594005	8.668524e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss


  if loss_value < best_final_loss:


		1000	8.110571	0.0010508022
		2000	4.2131815	0.00031453435
		Final loss:
		2000	4.2131815	0.00031453435


  if loss_value < best_final_loss:


In [9]:
print("--- Results ---")
for result in results_table:
    print(f"Fold {result['Fold']}:")
    print(f"  lambda*: {result['lambda*']}")
    print(f"  h*: {result['h*']}")
    print(f"  Baseline Test Error: {result['Baseline Test Error']}")
    print(f"  RLR Test Error: {result['RLR Test Error']}")
    print(f"  ANN Test Error: {result['ANN Test Error']}")

--- Results ---
Fold 1:
  lambda*: 100
  h*: 1
  Baseline Test Error: 2.8038408756256104
  RLR Test Error: 2.1936287879943848
  ANN Test Error: 170.8423614501953
Fold 2:
  lambda*: 100
  h*: 2
  Baseline Test Error: 4.23134183883667
  RLR Test Error: 3.319079875946045
  ANN Test Error: 229.40969848632812
Fold 3:
  lambda*: 100
  h*: 1
  Baseline Test Error: 5.703014373779297
  RLR Test Error: 4.96721076965332
  ANN Test Error: 295.3771057128906
Fold 4:
  lambda*: 100
  h*: 1
  Baseline Test Error: 2.57759428024292
  RLR Test Error: 1.8649108409881592
  ANN Test Error: 140.0723876953125
Fold 5:
  lambda*: 100
  h*: 2
  Baseline Test Error: 5.7758708000183105
  RLR Test Error: 4.7767767906188965
  ANN Test Error: 305.45343017578125
Fold 6:
  lambda*: 10
  h*: 1
  Baseline Test Error: 5.032508373260498
  RLR Test Error: 3.8410308361053467
  ANN Test Error: 264.9061279296875
Fold 7:
  lambda*: 100
  h*: 1
  Baseline Test Error: 3.5277955532073975
  RLR Test Error: 3.0595483779907227
  ANN 