In [55]:
import numpy as np
import pandas as pd
import sklearn.model_selection as skm

from ISLP import load_data
from sklearn.model_selection import train_test_split
from matplotlib.pyplot import subplots
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error, r2_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV, RidgeCV

In [56]:
Boston = load_data('Boston')
Boston

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,6.48,22.0


In [57]:
y = Boston['crim']
X = Boston[Boston.columns.drop('crim')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [58]:
X_train.shape

(404, 12)

### (a)

#### Lasso

In [59]:


lasso = LassoCV(cv=10, random_state=1, alphas=range(1,100))
lasso.fit(X_train, y_train)
test_score = lasso.score(X_test, y_test)
y_predicted = lasso.predict(X_test)

print(f"R-squired from lasso: {test_score:.2f}")
print(f"Test MSE from lasso: {mean_squared_error(y_test, y_predicted):.3}")



R-squired from lasso: 0.51
Test MSE from lasso: 27.0


#### Ridge

In [60]:
ridge = RidgeCV(cv=10, alphas=range(1,100))
ridge.fit(X_train, y_train)
test_score = ridge.score(X_test, y_test)
y_predicted = ridge.predict(X_test)

print(f"R-squired from rigde: {test_score:.3f}")
print(f"Test MSE: {mean_squared_error(y_test, y_predicted):.4}")

R-squired from rigde: 0.544
Test MSE: 25.12


#### Best subset selection

In [66]:
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.linear_model import LinearRegression

best_col_names, best_score, mse = any, 0, 0

for i in range(1, len(X.columns)):
    select = SelectKBest(score_func=f_regression, k=i)    
    select.fit_transform(X_train, y_train)            
    filter = select.get_support()    
    
    # names of the best i features
    col_names = X_train.columns[filter]        
    X_train_short = X_train[col_names]
    X_test_short = X_test[col_names]

    reg = LinearRegression()
    model = reg.fit(X_train_short, y_train)        
    y_pred_test = model.predict(X_test_short)        
    score = r2_score(y_test, y_pred_test)

    if score > best_score:
        best_score = score
        best_col_names = col_names
        mse = mean_squared_error(y_test, y_pred_test)

    
print(f"R-squired from best subset selection: {best_score:.3f}")
print(f"Test MSE: {mse:.4}")
print('Selected predictors: ', best_col_names.to_list())

R-squired from best subset selection: 0.545
Test MSE: 25.08
Selected predictors:  ['indus', 'rad', 'tax', 'lstat']


### PCA

In [92]:
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline

for n in range(1, 13):
    pca = PCA(n_components=3).fit(X_train)

    pcr = make_pipeline(StandardScaler(), PCA(n_components=n), LinearRegression())
    pcr.fit(X_train, y_train)

    test_score = pcr.score(X_test, y_test)
    y_predicted = pcr.predict(X_test)
    mse = mean_squared_error(y_test, y_predicted)

    print(f"n = {n}: R-squired {test_score:.3f}, MSE {mse}")    

n = 1: R-squired 0.386, MSE 33.81886443351021
n = 2: R-squired 0.390, MSE 33.585831964087305
n = 3: R-squired 0.430, MSE 31.413042014716456
n = 4: R-squired 0.438, MSE 30.963739154647165
n = 5: R-squired 0.456, MSE 29.97867658533334
n = 6: R-squired 0.469, MSE 29.235434475018565
n = 7: R-squired 0.530, MSE 25.872810555360402
n = 8: R-squired 0.536, MSE 25.552465065056833
n = 9: R-squired 0.537, MSE 25.52769084724572
n = 10: R-squired 0.540, MSE 25.345619696028162
n = 11: R-squired 0.524, MSE 26.21027016430195
n = 12: R-squired 0.538, MSE 25.468734105594564


All tested methods gives R-squired in the range 0.51 - 0.54.  

Best results for subset selection, worst for PCA

### (b)

In (a) the Ridge and the Lasso already used with cv. Rigde and best features selection has the best results.

### (c)

From the best features selection we know, that the best features are: **['indus', 'rad', 'tax', 'lstat']**

We can obtain R-squired of 0.55 with those predictors.