In [1]:
import warnings
warnings.filterwarnings("ignore")
import time
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy.linalg import norm
import sklearn.datasets as skds
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import robust_scale
from sklearn import datasets
import torch
from optimization import Friedman, train_model

In [2]:
def save_data(n_sim,n_epochs,noise):
    file = 'friedman_sim_data_'+'obs_'+str(n_sim)+'sims_'+str(n_epochs)+'_epochs_noise'+str(noise)+'.npz'
    np.savez(file,beta_mse = beta_mse,elapsed_time = elapsed_time,loss_history = loss_history)

## Noise = 0

In [None]:
b = np.array([10,20,0.5,10,5]) # DO NOT CHANGE, THESE ARE TRUE BETAS 
#https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_friedman1.html
n_epochs = 250
n_sim = 100
noise = 0
# Track loss, beta error, and elapsed time
loss_history = {
    'sgd': np.zeros((n_sim, n_epochs)),
    'lbfgs': np.zeros((n_sim, n_epochs))
}

beta_mse = {
    'sgd': np.zeros(n_sim),
    'lbfgs': np.zeros(n_sim)
}

elapsed_time = {
    'sgd': np.zeros(n_sim),
    'lbfgs': np.zeros(n_sim)
}


for i in tqdm(range(n_sim)): # 100 simulations take 6 minutes
    X,y = datasets.make_friedman1(n_samples=100, n_features=5, noise=noise, random_state=i)
    X_new = torch.tensor(X,requires_grad=True)
    y_new = torch.tensor(y,requires_grad=True)
    
    b_hat_sgd, loss_hist_sgd, elapsed_sgd = train_model(X_new, y_new, method='sgd', n_epochs=n_epochs)
    b_hat_lbfgs, loss_hist_lbfgs, elapsed_lbfgs = train_model(X_new, y_new, method='lbfgs', n_epochs=n_epochs)

    beta_mse['sgd'][i] = float(((b - np.array(b_hat_sgd))**2).mean())
    elapsed_time['sgd'][i] = elapsed_sgd
    loss_history['sgd'][i] = loss_hist_sgd
    
    beta_mse['lbfgs'][i] = float(((b - np.array(b_hat_lbfgs))**2).mean())
    elapsed_time['lbfgs'][i] = elapsed_lbfgs
    loss_history['lbfgs'][i] = loss_hist_lbfgs
save_data(n_sim,n_epochs,noise)

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
fig, axes = plt.subplots(figsize=(12, 4), ncols=2)
def get_finite_vals(array):
    ii = np.isfinite(array)
    return(array[ii])
    
axes[0].violinplot(get_finite_vals(beta_mse['sgd']))
axes[0].violinplot(get_finite_vals(beta_mse['lbfgs']), positions=[1.5])
    
axes[1].violinplot(get_finite_vals(elapsed_time['sgd']))
axes[1].violinplot(get_finite_vals(elapsed_time['lbfgs']), positions=[1.5])
    
axes[0].set_xticks([1, 1.5], ['SGD', 'LBFGS'])
axes[1].set_xticks([1, 1.5], ['SGD', 'LBFGS'])
    
axes[0].set_title(f'MSE: 0 Noise')
axes[1].set_title(f'SGD vs LBFGS Time Elapsed : 0 Noise')
    
axes[0].set_ylabel('MSE')
axes[1].set_ylabel('Time (s)')

In [None]:
sgd_mse_df = pd.DataFrame(beta_mse['sgd'])
lbfgs_mse_df = pd.DataFrame(beta_mse['lbfgs'])
n = 2
fig, ax = plt.subplots(1, 2, figsize=(20, 18))
sgd_mse_df.hist(ax = ax[0],bins = 20)
ax[0].set_title('Distribution of SGD MSE (Friedman, 0 noise) across 100 Simulations')
ax[0].set_xlabel('MSE')
ax[0].set_ylabel('Count')
lbfgs_mse_df.hist(ax = ax[1], bins = 20, color = 'orange')
ax[1].set_title('Distribution of LBFGS MSE (Friedman, 0 noise) across 100 Simulations')
ax[1].set_xlabel('MSE')
ax[1].set_ylabel('Count')


five_num_summary_mse = pd.concat([sgd_mse_df.describe(),lbfgs_mse_df.describe()],axis = 1)
five_num_summary_mse.columns = ['SGD','LBFGS']
fname = 'friedman_sim_data_'+str(n_sim)+'sims_'+str(n_epochs)+'epochs_'+str(noise)+'noise_mse5summ.xlsx'
five_num_summary_mse.to_excel(fname)

## Noise = 50

In [None]:
b = np.array([10,20,0.5,10,5]) # DO NOT CHANGE, THESE ARE TRUE BETAS 
#https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_friedman1.html
n_epochs = 250
n_sim = 100
noise = 50
# Track loss, beta error, and elapsed time
loss_history = {
    'sgd': np.zeros((n_sim, n_epochs)),
    'lbfgs': np.zeros((n_sim, n_epochs))
}

beta_mse = {
    'sgd': np.zeros(n_sim),
    'lbfgs': np.zeros(n_sim)
}

elapsed_time = {
    'sgd': np.zeros(n_sim),
    'lbfgs': np.zeros(n_sim)
}



for i in tqdm(range(n_sim)): # 100 simulations take 6 minutes
    X,y = datasets.make_friedman1(n_samples=100, n_features=5, noise=noise, random_state=i)
    X_new = torch.tensor(X,requires_grad=True)
    y_new = torch.tensor(y,requires_grad=True)
    
    b_hat_sgd, loss_hist_sgd, elapsed_sgd = train_model(X_new, y_new, method='sgd', n_epochs=n_epochs)
    b_hat_lbfgs, loss_hist_lbfgs, elapsed_lbfgs = train_model(X_new, y_new, method='lbfgs', n_epochs=n_epochs)

    beta_mse['sgd'][i] = float(((b - np.array(b_hat_sgd))**2).mean())
    elapsed_time['sgd'][i] = elapsed_sgd
    loss_history['sgd'][i] = loss_hist_sgd
    
    beta_mse['lbfgs'][i] = float(((b - np.array(b_hat_lbfgs))**2).mean())
    elapsed_time['lbfgs'][i] = elapsed_lbfgs
    loss_history['lbfgs'][i] = loss_hist_lbfgs
save_data(n_sim,n_epochs,noise)

In [None]:
fig, axes = plt.subplots(figsize=(12, 4), ncols=2)
def get_finite_vals(array):
    ii = np.isfinite(array)
    return(array[ii])
    
axes[0].violinplot(get_finite_vals(beta_mse['sgd']))
axes[0].violinplot(get_finite_vals(beta_mse['lbfgs']), positions=[1.5])
    
axes[1].violinplot(get_finite_vals(elapsed_time['sgd']))
axes[1].violinplot(get_finite_vals(elapsed_time['lbfgs']), positions=[1.5])
    
axes[0].set_xticks([1, 1.5], ['SGD', 'LBFGS'])
axes[1].set_xticks([1, 1.5], ['SGD', 'LBFGS'])
    
axes[0].set_title(f'MSE: '+str(noise)+' Noise')
axes[1].set_title(f'SGD vs LBFGS Time Elapsed : '+str(noise)+' Noise')
    
axes[0].set_ylabel('MSE')
axes[1].set_ylabel('Time (s)')

In [None]:
sgd_mse_df = pd.DataFrame(beta_mse['sgd'])
lbfgs_mse_df = pd.DataFrame(beta_mse['lbfgs'])
n = 2
fig, ax = plt.subplots(1, 2, figsize=(20, 18))
sgd_mse_df.hist(ax = ax[0],bins = 20)
ax[0].set_title('Distribution of SGD MSE (Friedman, '+str(noise)+' noise) across 100 Simulations')
ax[0].set_xlabel('MSE')
ax[0].set_ylabel('Count')
lbfgs_mse_df.hist(ax = ax[1], bins = 20, color = 'orange')
ax[1].set_title('Distribution of LBFGS MSE (Friedman, '+str(noise)+' noise) across 100 Simulations')
ax[1].set_xlabel('MSE')
ax[1].set_ylabel('Count')

five_num_summary_mse = pd.concat([sgd_mse_df.describe(),lbfgs_mse_df.describe()],axis = 1)
five_num_summary_mse.columns = ['SGD','LBFGS']
fname = 'friedman_sim_data_'+str(n_sim)+'sims_'+str(n_epochs)+'epochs_'+str(noise)+'noise_mse5summ.xlsx'
five_num_summary_mse.to_excel(fname)

## 100 noise

In [None]:
b = np.array([10,20,0.5,10,5]) # DO NOT CHANGE, THESE ARE TRUE BETAS 
#https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_friedman1.html
n_epochs = 250
n_sim = 100
noise = 100
# Track loss, beta error, and elapsed time
loss_history = {
    'sgd': np.zeros((n_sim, n_epochs)),
    'lbfgs': np.zeros((n_sim, n_epochs))
}

beta_mse = {
    'sgd': np.zeros(n_sim),
    'lbfgs': np.zeros(n_sim)
}

elapsed_time = {
    'sgd': np.zeros(n_sim),
    'lbfgs': np.zeros(n_sim)
}



for i in tqdm(range(n_sim)): # 100 simulations take 6 minutes
    X,y = datasets.make_friedman1(n_samples=100, n_features=5, noise=noise, random_state=i)
    X_new = torch.tensor(X,requires_grad=True)
    y_new = torch.tensor(y,requires_grad=True)
    
    b_hat_sgd, loss_hist_sgd, elapsed_sgd = train_model(X_new, y_new, method='sgd', n_epochs=n_epochs)
    b_hat_lbfgs, loss_hist_lbfgs, elapsed_lbfgs = train_model(X_new, y_new, method='lbfgs', n_epochs=n_epochs)

    beta_mse['sgd'][i] = float(((b - np.array(b_hat_sgd))**2).mean())
    elapsed_time['sgd'][i] = elapsed_sgd
    loss_history['sgd'][i] = loss_hist_sgd
    
    beta_mse['lbfgs'][i] = float(((b - np.array(b_hat_lbfgs))**2).mean())
    elapsed_time['lbfgs'][i] = elapsed_lbfgs
    loss_history['lbfgs'][i] = loss_hist_lbfgs
save_data(n_sim,n_epochs,noise)

In [None]:
fig, axes = plt.subplots(figsize=(12, 4), ncols=2)
def get_finite_vals(array):
    ii = np.isfinite(array)
    return(array[ii])
    
axes[0].violinplot(get_finite_vals(beta_mse['sgd']))
axes[0].violinplot(get_finite_vals(beta_mse['lbfgs']), positions=[1.5])
    
axes[1].violinplot(get_finite_vals(elapsed_time['sgd']))
axes[1].violinplot(get_finite_vals(elapsed_time['lbfgs']), positions=[1.5])
    
axes[0].set_xticks([1, 1.5], ['SGD', 'LBFGS'])
axes[1].set_xticks([1, 1.5], ['SGD', 'LBFGS'])
    
axes[0].set_title(f'MSE: '+str(noise)+' Noise')
axes[1].set_title(f'SGD vs LBFGS Time Elapsed : '+str(noise)+' Noise')
    
axes[0].set_ylabel('MSE')
axes[1].set_ylabel('Time (s)')

In [None]:
sgd_mse_df = pd.DataFrame(beta_mse['sgd'])
lbfgs_mse_df = pd.DataFrame(beta_mse['lbfgs'])
n = 2
fig, ax = plt.subplots(1, 2, figsize=(20, 18))
sgd_mse_df.hist(ax = ax[0],bins = 20)
ax[0].set_title('Distribution of SGD MSE (Friedman, '+str(noise)+' noise) across 100 Simulations')
ax[0].set_xlabel('MSE')
ax[0].set_ylabel('Count')
lbfgs_mse_df.hist(ax = ax[1], bins = 20, color = 'orange')
ax[1].set_title('Distribution of LBFGS MSE (Friedman, '+str(noise)+' noise) across 100 Simulations')
ax[1].set_xlabel('MSE')
ax[1].set_ylabel('Count')

five_num_summary_mse = pd.concat([sgd_mse_df.describe(),lbfgs_mse_df.describe()],axis = 1)
five_num_summary_mse.columns = ['SGD','LBFGS']
fname = 'friedman_sim_data_'+str(n_sim)+'sims_'+str(n_epochs)+'epochs_'+str(noise)+'noise_mse5summ.xlsx'
five_num_summary_mse.to_excel(fname)

The suprising thing here is that with hand calculations, you will get the wrong result. The LGBFS model is able to perform even with the noise and get the true $\beta$. The SGD algorithm is not able to discern it.

In [None]:
X,y = datasets.make_friedman1(n_samples=100, n_features=5, noise=100, random_state=1)
X_new = torch.tensor(X,requires_grad=True)
y_new = torch.tensor(y,requires_grad=True)

In [None]:
b[0]*torch.sin(torch.pi*X_new[:,0]*X_new[:,1]) + b[1]*(X_new[:,2]-b[2])**2 + (b[3]*(X_new[:,3])) + (b[4]* X_new[:,4])

In [None]:
y_new

## Sanity Check

In [None]:
X,y = datasets.make_friedman1(n_samples=100, n_features=5, noise=0, random_state=1)
X_new = torch.tensor(X,requires_grad=True)
y_new = torch.tensor(y,requires_grad=True)

With 0 noise, it matches.

In [None]:

b[0]*torch.sin(torch.pi*X_new[:,0]*X_new[:,1]) + b[1]*(X_new[:,2]-b[2])**2 + (b[3]*(X_new[:,3])) + (b[4]* X_new[:,4])

In [None]:
y_new