# 7. More L1 Regularization on Simulated Data

## Setup and Data

In [32]:
import numpy as np
import pandas as pd
import gpflow as gpf
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score


In [33]:
def sim_data(n=500, m=7, beta=None, sigma=1, seed=22):
    np.random.seed(seed)
    
    X = np.random.randn(n, m)
    
    # Define a beta vector with strong and weak signals
    if beta is None:
        beta = np.zeros(m)
        beta[0] = 3.5   # strong positive
        beta[2] = -5.0  # strong negative
        beta[4] = 0.7   # weak positive
        beta[6] = -0.7  # weak negative

    # Linear component
    linear_term = X @ beta

    # Add noise
    noise = np.random.normal(0, sigma, n)

    # Final output
    y = linear_term + noise

    return X, y.reshape(-1, 1), beta

In [34]:
X, y, beta = sim_data()

Xtrain_unscaled, Xtest_unscaled, ytrain, ytest = train_test_split(X, y, test_size = 0.2, random_state = 22)

ytrain = ytrain.reshape(-1)
ytest = ytest.reshape(-1)

m = X.shape[1]

In [35]:
# make sure we scale data
scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain_unscaled)
Xtest = scaler.transform(Xtest_unscaled)
ytrain = ytrain.reshape(-1,1)


## GP Params

In [36]:
A_init = tf.zeros((m, 1), dtype=tf.float64)
b_init = tf.zeros((1,), dtype=tf.float64)

kernel = gpf.kernels.SquaredExponential(lengthscales=np.ones(m))
likelihood = gpf.likelihoods.Gaussian()
mean_function = gpf.functions.Linear(A=A_init, b = b_init) 

opt = gpf.optimizers.Scipy()

## Original GP

In [37]:
# define initial model
og_mod = gpf.models.GPR(
    data=(Xtrain, ytrain), 
    kernel=kernel, 
    likelihood = likelihood,
    mean_function = mean_function
    )

opt.minimize(og_mod.training_loss, og_mod.trainable_variables)

  message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
  success: True
   status: 0
      fun: 581.8860653637489
        x: [ 9.140e+00  1.127e+01 ... -6.771e-01 -6.535e-02]
      nit: 39
      jac: [ 3.297e-10  2.966e-10 ... -5.701e-03 -6.891e-04]
     nfev: 47
     njev: 47
 hess_inv: <17x17 LbfgsInvHessProduct with dtype=float64>

## Alternating Optimization

In [38]:
# define initial alternating model
alt_mod = gpf.models.GPR(
    data=(Xtrain, ytrain), 
    kernel=kernel, 
    likelihood = likelihood,
    mean_function = mean_function
    )

In [39]:
X_current = Xtrain
m = X_current.shape[1]
current_kernel = kernel
active_dims = list(range(Xtrain.shape[1]))
count = 0
tol = 1e-6

while True:

    count += 1

    print(f"\n --- Beginning Iteration {count} --- ")

    alt_las = LassoCV()
    alt_las.fit(X_current, ytrain.reshape(-1))
    print(f'Lasso Best Alpha: {alt_las.alpha_:.3f}')
    print(f'Lasso Coefficients: {alt_las.coef_}')

    # Threshold and select features
    select_coef = np.abs(alt_las.coef_) >= 0.1
    prev_active_dims = active_dims # save old value

    active_dims = np.where(select_coef)[0].tolist() # set new value
    print(f'Selected Features: {select_coef}')
    print('active dims', active_dims)

    # Update X_current
    X_current = X_current[:, active_dims]

    # Update m
    m = X_current.shape[1]

    # Update kernel
    alt_mod.kernel = gpf.kernels.SquaredExponential(
        lengthscales = np.ones(len(active_dims)),
        active_dims=list(range(m))
    )
    
    # Optimize
    prev_A = alt_mod.mean_function.A # store old values
    opt.minimize(alt_mod.training_loss, alt_mod.trainable_variables)
    current_A = alt_mod.mean_function.A # set new values


    if count >= 20:
        print('Too Many Iterations')
        break

    # stop when active dimensions and linear weights stabilize, 
    if active_dims == prev_active_dims and tf.reduce_max(tf.abs(current_A - prev_A)) < tol:
        print(f"\n ! Converged on iteration {count} ! ")
        break


 --- Beginning Iteration 1 --- 
Lasso Best Alpha: 0.005
Lasso Coefficients: [ 3.50246206 -0.00902632 -5.03780693 -0.05565943  0.65500079 -0.07854696
 -0.67178148]
Selected Features: [ True False  True False  True False  True]
active dims [0, 2, 4, 6]

 --- Beginning Iteration 2 --- 
Lasso Best Alpha: 0.005
Lasso Coefficients: [ 3.49948751 -5.03870955  0.65797738 -0.67252924]
Selected Features: [ True  True  True  True]
active dims [0, 1, 2, 3]

 --- Beginning Iteration 3 --- 
Lasso Best Alpha: 0.005
Lasso Coefficients: [ 3.49948751 -5.03870955  0.65797738 -0.67252924]
Selected Features: [ True  True  True  True]
active dims [0, 1, 2, 3]

 ! Converged on iteration 3 ! 


In [41]:
alt_mod.mean_function.A

<Parameter: name=identity, dtype=float64, shape=[7, 1], fn="identity", numpy=
array([[ 3.50164879e+00],
       [ 3.20450392e-03],
       [-5.04164825e+00],
       [-6.83075088e-02],
       [ 6.67237398e-01],
       [-8.13309070e-02],
       [-6.76059616e-01]])>

## Comparing Performance

In [42]:
y_mean_og_mod, y_var_og_mod = og_mod.predict_y(Xtest)
rmse = mean_squared_error(ytest, y_mean_og_mod, squared=False)
print(f'RMSE Alternating Mod: {rmse:.3f}')

RMSE Alternating Mod: 0.972


In [43]:
y_mean_alt_mod, y_var_alt_mod = alt_mod.predict_y(Xtest)
rmse = mean_squared_error(ytest, y_mean_alt_mod, squared=False)
print(f'RMSE Alternating Mod: {rmse:.3f}')

RMSE Alternating Mod: 0.981


In [45]:
og_mod.kernel.lengthscales

<Parameter: name=softplus, dtype=float64, shape=[7], fn="softplus", numpy=
array([ 9.14007293, 11.27289865,  9.91632504,  7.06933958, 11.80933738,
        5.60697016,  7.19383501])>

In [46]:
alt_mod.kernel.lengthscales

<Parameter: name=softplus, dtype=float64, shape=[4], fn="softplus", numpy=array([2.22679813e-01, 8.17813845e+02, 3.63859493e+02, 7.61408766e+02])>

In [50]:
og_mod.mean_function.A

<Parameter: name=identity, dtype=float64, shape=[7, 1], fn="identity", numpy=
array([[ 3.50164879e+00],
       [ 3.20450392e-03],
       [-5.04164825e+00],
       [-6.83075088e-02],
       [ 6.67237398e-01],
       [-8.13309070e-02],
       [-6.76059616e-01]])>

In [48]:
alt_mod.mean_function.A

<Parameter: name=identity, dtype=float64, shape=[7, 1], fn="identity", numpy=
array([[ 3.50164879e+00],
       [ 3.20450392e-03],
       [-5.04164825e+00],
       [-6.83075088e-02],
       [ 6.67237398e-01],
       [-8.13309070e-02],
       [-6.76059616e-01]])>