In [1]:
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy as np
from numpy import mean, std, absolute, arange
from sklearn.model_selection import cross_val_score, RepeatedKFold, GridSearchCV
from sklearn.linear_model import Ridge, RidgeCV

# Define Functions

In [2]:
def ridgeRegressionCG(Au, bu, xu, phi=0, i_max=1e6, epsilon=1e-6):
    iCheckFPErr = np.max([50, int(np.sqrt(np.size(Au, 1)))])
    i = 0
    x = Au.T @ xu; b = Au.T @ bu
    r = b - (Au.T @ (Au @ x) + phi * x)
    d = r
    delta_new = r.T.dot(r)
    delta_0 = delta_new
    X = []; X.append(x)
    while (i < i_max) & (delta_new > epsilon ** 2 * delta_0):
        q = Au.T @ (Au @ d) + phi * d
        alpha = delta_new / (d.T.dot(q))
        x = x + alpha * d
        if i % iCheckFPErr == 0:
            r = b - (Au.T @ (Au @ x) + phi * x)
        else:
            r = r - alpha * q
        delta_old = delta_new
        delta_new = r.T.dot(r)
        beta = delta_new / delta_old
        d = r + beta * d
        X.append(x)
        i += 1
    return x, np.array(X), i

# Load dataset

In [3]:
# Load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'
dataframe = read_csv(url, header=None)

print("Shape summary:\n", dataframe.shape)
print("Summary of first few lines:\n", dataframe.head())

# Extract data
data = dataframe.values
X, y = data[:, :-1], data[:, -1]

Shape summary:
 (506, 14)
Summary of first few lines:
         0     1     2   3      4      5     6       7   8      9     10  \
0  0.00632  18.0  2.31   0  0.538  6.575  65.2  4.0900   1  296.0  15.3   
1  0.02731   0.0  7.07   0  0.469  6.421  78.9  4.9671   2  242.0  17.8   
2  0.02729   0.0  7.07   0  0.469  7.185  61.1  4.9671   2  242.0  17.8   
3  0.03237   0.0  2.18   0  0.458  6.998  45.8  6.0622   3  222.0  18.7   
4  0.06905   0.0  2.18   0  0.458  7.147  54.2  6.0622   3  222.0  18.7   

       11    12    13  
0  396.90  4.98  24.0  
1  396.90  9.14  21.6  
2  392.83  4.03  34.7  
3  394.63  2.94  33.4  
4  396.90  5.33  36.2  


# Evaluate Ridge Regression Model by Cross-Validation

In [4]:
# Define model
model = Ridge(alpha=1.0)

# Define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# Evaluate model
scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

# Force scores to be positive
scores = absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean MAE: 3.382 (0.519)


# Prediction

In [5]:
# Fit model
model.fit(X, y)

# Define new data
row = [0.00632, 18.00, 2.310, 0, 0.5380, 6.5750, 65.20, 4.0900, 1, 296.0, 15.30, 396.90, 4.98]

# Make a prediction
yhat = model.predict([row])

# Summarize prediction
print('Predicted: %.3f' % yhat)

Predicted: 30.253


# Tuning Ridge Hyperparameters

In [6]:
# Define model
model = Ridge()

# Define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# Define grid
grid = dict()
grid['alpha'] = arange(0, 1, 0.01)

# Define search
search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

# Perform the search
results = search.fit(X, y)

# Summarize
print('MAE: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

MAE: -3.379
Config: {'alpha': 0.51}


# Tuning Ridge Hyperparameters via the RidgeCV class of Scikit-Learn

In [7]:
# Define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# Define model
model = RidgeCV(alphas=arange(0, 1, 0.01), cv=cv, scoring='neg_mean_absolute_error')

# Fit model
model.fit(X, y)

# Summarize chosen configuration
print('alpha: %f' % model.alpha_)

alpha: 0.510000


# Solve using CG-based Ridge Regression

In [8]:
# Fit model
Au = X; bu = y.reshape(-1, 1); xu_0 = np.zeros(np.shape(bu))
w_cgm, _, _ = ridgeRegressionCG(Au, bu, xu_0, phi=0.51)

# Define new data
row = [0.00632, 18.00, 2.310, 0, 0.5380, 6.5750, 65.20, 4.0900, 1, 296.0, 15.30, 396.90, 4.98]
Au_hat = np.array(row)

# Predict
bu_hat = Au_hat @ w_cgm
print('Predicted value: %.3f' % bu_hat)

Predicted value: 29.143
