In [1]:
from sklearn import datasets
import numpy as np

In [2]:
# load dataset
X, y = datasets.load_boston(return_X_y=True)
print(X.shape)
print(y.shape)

(506, 13)
(506,)


In [3]:
# create virtual features, including
#   second degree of the first variable
#   second degrees of the eighth variable
#   third and second degrees of the eleventh variable

X = np.column_stack((X,np.power(X[:,0],2)))
X = np.column_stack((X,np.power(X[:,7],2)))
X = np.column_stack((X,np.power(X[:,10],2)))
X = np.column_stack((X,np.power(X[:,10],3)))

# add a dimension with all 1 to account for the intercept term
interc = np.ones((X.shape[0], 1))
X = np.hstack((interc, X))
print(X.shape)

(506, 18)


In [4]:
# split training and testing dataset
train_ratio = 0.8
cutoff = int(X.shape[0] * train_ratio)
X_tr = X[:cutoff, :]
y_tr = y[:cutoff]
X_te = X[cutoff:,:]
y_te = y[cutoff:]
print('Train/Test: %d/%d' %(X_tr.shape[0], X_te.shape[0]))

Train/Test: 404/102


In [5]:
# linear regression using the normal equation
def pseudo_inverse(A):
    # Calculate the pseudo_inverse of A
    pinv = np.matmul(np.linalg.inv(np.matmul(A.T, A)), A.T)
    return pinv 

In [15]:
# fit the polynomial on the training set
beta = np.matmul(pseudo_inverse(X_tr),y_tr)

In [7]:
# evaluation functions
def MSE(prediction,reference):
    # Calculate the mean square error between the prediction and reference vectors
    mse = np.mean((prediction-reference)**2)
    return mse

def MAE(prediction, reference):
    # Calculate the mean absolute error between the prediction and reference vectors
    mae = np.mean(np.absolute(prediction-reference))
    return mae

In [8]:
# make prediction on the testing set
pred = np.matmul(X_te,beta)
mse = MSE(pred, y_te)
mae = MAE(pred, y_te)
print(mse)
print(mae)

28.8534678918
4.32632613636


### Now to regularized regression

In [9]:
# regularized linear regression 
def regularized_pseudo_inverse(A, theta):
    # Calculate the regularized pseudo_inverse of A
    pinv = np.matmul(np.linalg.inv(np.matmul(A.T, A) + theta * np.identity(A.shape[1])), A.T)
    return pinv 

In [12]:
# fit the polynomial, regularized by theta
theta = 0.5
beta_regularized = np.matmul(regularized_pseudo_inverse(X_tr, theta),y_tr)

In [13]:
# make prediction on the testing set
pred_2 = np.matmul(X_te,beta_regularized)
mse = MSE(pred_2, y_te)
mae = MAE(pred_2, y_te)
print(mse)
print(mae)

27.4731591897
4.27189745795
