# Section 2

In [None]:
import cvxpy as cp
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
class color:
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'

In [None]:
print('To use color, just append color.BOLD to the beginning of the printed string and color.END to the end:')
print(color.BOLD + 'Like This!' + color.END)

Implement the mean absolute error:
$$
MAE = \frac{1}{N}\sum_{i=1}^N |y_i-x_i^\top\theta|
$$

In [None]:
import math
def get_MAE(theta, X, y):
    ypred = X@theta.T
    mae = np.average(np.abs(ypred - y), axis=0)
    
    # compare to MAE from sklearn:
    mae_sklr = mean_absolute_error(X@theta.T, y)
    assert(math.isclose(mae, mae_sklr))
    return mae

In [None]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X, X_test, Y, Y_test = train_test_split(diabetes['data'],
                                        np.expand_dims(diabetes['target'], 1),
                                        test_size=0.5,
                                        random_state=0)

# Add bias column to data:
X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=1)
X_test = np.concatenate((X_test, np.ones((X_test.shape[0], 1))), axis=1)

In [None]:
print(diabetes['DESCR'])

## Question 2.2
Implement below the mean-absolute error regression with LASSO. Use $\lambda=0.5$. Hints: in the X matrix, rows represent data samples. Also, don't forget to add the `1` column to capture the intercept. (Use the `GLPK` solver)

In [None]:
# Function to solve LPs 
def solve_LP(X, Y, lambda_):
    d = X.shape[1]
    N = X.shape[0]

    # auxiliary variables:
    Z = cp.Variable((N, 1))
    delta = cp.Variable((d, 1))

    # variable to solve:
    theta = cp.Variable((1, d))

    # linear program:
    prob = cp.Problem(cp.Minimize(cp.sum(Z) + lambda_ * cp.sum(delta)), [
        Y - X @ theta.T <= Z, -Y + X @ theta.T <= Z, theta <= delta,
        -theta <= delta
    ])

    # solve LP:
    prob.solve()
    theta_opt = theta.value
    opt_value = prob.value
    dual_value = prob.constraints[0].dual_value
    return theta_opt, opt_value, dual_value 

In [None]:
lambda_ = 0.5
theta_opt, opt_value, dual_value = solve_LP(X, Y, lambda_)

# Print results:
print("\nThe optimal value is", opt_value)
print("A solution theta is")
print(theta_opt)
print(f'Shape of solution:{theta_opt.shape}')

#print("A dual solution is")
#print(dual_value)

In [None]:
print(color.BOLD + 'Training Results' + color.END)
print('MAE: {}'.format(get_MAE(theta_opt, X, Y)))
print('\n')
print(color.BOLD + 'Test Results' + color.END)
print('MAE: {}'.format(get_MAE(theta_opt, X_test, Y_test)))

## Question 2.3
Implement Cross-Validation for your MAE LASSO regression. You may recycle any functions used above. Hint: Use the `sklearn` function `train_test_split`, which can be used to randomly split the data.

Use cross-validation to tune the hyperparameter $\lambda$. Randomly select 75% of the data to construct Dtrain and use the rest of the data to construct Dval. Use 50 logarithmically spaced values between [10e-5; 10e-1] as candidates for $\lambda$, select the one performing best on the validation set in terms of MAE. Compare again the test performance against the training performance.

In [None]:
10e-5, 10e-1, 1e-1

In [None]:
# Hyperparameters:
lambda_ = np.logspace(-5, -1, 50, base = 10)
lambda_

In [None]:
# 75% split:
X, X_test, Y, Y_test = train_test_split(diabetes['data'], 
                                        np.expand_dims(diabetes['target'], 1), 
                                        test_size=0.25, random_state=0)
# Add bias column to data:
X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=1)
X_test = np.concatenate((X_test, np.ones((X_test.shape[0], 1))), axis=1)

e_train, e_val, thetas, opt_val = [], [], [], []

# Cross-validation for 50 values of lambda:
for l in lambda_:
    theta_opt, opt_value, dual_value = solve_LP(X, Y, l)
    thetas.append(theta_opt)
    opt_val.append(opt_value)
    
    # evaluate on training set:
    e_train.append(get_MAE(theta_opt, X, Y))
    # evaluate on validation set:
    e_val.append(get_MAE(theta_opt, X_test, Y_test))
    
# take hyperparameter with smallest validation error:
best_lambda = lambda_[np.argmin(e_val)]
best_theta = thetas[np.argmin(e_val)]
best_value = opt_val[np.argmin(e_val)]

print('---Optimal values--')
print(f'Optimal lambda: {best_lambda}')
print(f'Optimal value: {best_value}')
print(f'Optimal theta:\n {best_theta}')

In [None]:
print(color.BOLD + 'Training Results' + color.END)
print('MAE: {}'.format(get_MAE(best_theta, X, Y)))
print('\n')
print(color.BOLD + 'Test Results' + color.END)
print('MAE: {}'.format(get_MAE(best_theta, X_test, Y_test)))

In [None]:
fig, ax = plt.subplots(1)
ax.plot(lambda_, e_val)
ax.set_xscale('log')