# Regression

### Dataset

In [1]:
from sklearn.datasets import load_boston

dataset = load_boston()

In [2]:
for key, value in dataset.items():
    print(key, len(value))

data 506
target 506
feature_names 13
DESCR 2379


In [3]:
print(dataset['DESCR'])

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [4]:
for index, datum in enumerate(zip(dataset['data'], dataset['target'])):
    if index in [1, 11, 111]:
        print(datum)

(array([2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01,
       6.4210e+00, 7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02,
       1.7800e+01, 3.9690e+02, 9.1400e+00]), 21.6)
(array([1.1747e-01, 1.2500e+01, 7.8700e+00, 0.0000e+00, 5.2400e-01,
       6.0090e+00, 8.2900e+01, 6.2267e+00, 5.0000e+00, 3.1100e+02,
       1.5200e+01, 3.9690e+02, 1.3270e+01]), 18.9)
(array([1.0084e-01, 0.0000e+00, 1.0010e+01, 0.0000e+00, 5.4700e-01,
       6.7150e+00, 8.1600e+01, 2.6775e+00, 6.0000e+00, 4.3200e+02,
       1.7800e+01, 3.9559e+02, 1.0160e+01]), 22.8)


### Data splitting

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split

X = np.asarray(dataset['data'])
y = np.asarray(dataset['target'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

### Lasso

In [6]:
#L1
from sklearn.linear_model import Lasso

#Models
Lasso_01 = Lasso(alpha=0.1)
Lasso_001 = Lasso(alpha=0.01)

#Fitting
Lasso_01.fit(X_train, y_train)
Lasso_001.fit(X_train, y_train)

#Score
print("Lasso (a=0.1): {:.5f}".format(Lasso_01.score(X_test, y_test)))
print("Lasso (a=0.01): {:.5f}".format(Lasso_001.score(X_test, y_test)))

Lasso (a=0.1): 0.71409
Lasso (a=0.01): 0.73324


In [7]:
#Let's find the important features!
Lasso_feature_selection = Lasso(alpha=1)
Lasso_feature_selection.fit(X_train, y_train)

np.set_printoptions(precision=2)
abs_feat = np.abs(Lasso_feature_selection.coef_)
perc_95 = np.percentile(Lasso_feature_selection.coef_, 95)

print("Coefficients list: \n{}\n".format(Lasso_feature_selection.coef_))
print("Indexes of important features: \n{}".format(np.argwhere(abs_feat > perc_95).flatten()))


Coefficients list: 
[-0.02  0.05 -0.    0.   -0.    1.    0.02 -0.6   0.23 -0.02 -0.62  0.01
 -0.74]

Indexes of important features: 
[ 5  7 10 12]


In [8]:
# 5f: average number of rooms per dwelling
# 7f: weighted distances to five Boston employment centres
# 10f: pupil-teacher ratio by town
# 12f: % lower status of the population
New_Lasso = Lasso(alpha=0.1)
New_Lasso.fit(X_train[:, (5,7,10,12)], y_train)
print("Lasso (using 4 important features): {:.5f}".format(New_Lasso.score(X_test[:, (5,7,10,12)], y_test)))
print("Lasso (using all features): {:.5f}".format(Lasso_01.score(X_test, y_test)))

Lasso (using 4 important features): 0.68186
Lasso (using all features): 0.71409


### Ridge Regression

In [9]:
#L2
from sklearn.linear_model import Ridge

#Models
Ridge_all = Ridge(alpha=0.1)
Ridge_part = Ridge(alpha=0.1)

#Fitting
Ridge_all.fit(X_train, y_train)
Ridge_part.fit(X_train[:, (5,7,10,12)], y_train)

#Score
print("Ridge (using 4 important features): {:.5f}".format(Ridge_part.score(X_test[:, (5,7,10,12)], y_test)))
print("Ridge (using all features): {:.5f}".format(Ridge_all.score(X_test, y_test)))

Ridge (using 4 important features): 0.68311
Ridge (using all features): 0.73414


### ElasticNet

In [10]:
#L1 & L2 linear combination
from sklearn.linear_model import ElasticNet

#Models
EN_all = ElasticNet(alpha=0.1)
EN_part = ElasticNet(alpha=0.1)

#Fitting
EN_all.fit(X_train, y_train)
EN_part.fit(X_train[:, (5,7,10,12)], y_train)

#Score
print("ElasticNet (using 4 important features): {:.5f}".format(EN_part.score(X_test[:, (5,7,10,12)], y_test)))
print("ElasticNet (using all features): {:.5f}".format(EN_all.score(X_test, y_test)))


ElasticNet (using 4 important features): 0.68071
ElasticNet (using all features): 0.71548


### SVM

In [11]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

#Models
SVM_rbf_all = make_pipeline(StandardScaler(), SVR(kernel='rbf'))
SVM_rbf_part = make_pipeline(StandardScaler(), SVR(kernel='rbf'))

SVM_linear_all = make_pipeline(StandardScaler(), SVR(kernel='linear'))
SVM_linear_part = make_pipeline(StandardScaler(), SVR(kernel='linear'))

#Fitting
SVM_rbf_all.fit(X_train, y_train)
SVM_rbf_part.fit(X_train[:, (5,7,10,12)], y_train)

SVM_linear_all.fit(X_train, y_train)
SVM_linear_part.fit(X_train[:, (5,7,10,12)], y_train)

#Score
print("SVM Linear (4 f.): {:.5f}".format(SVM_linear_part.score(X_test[:, (5,7,10,12)], y_test)))
print("SVM Linear (all f.): {:.5f}\n".format(SVM_linear_all.score(X_test, y_test)))

print("SVM RBF (4 f.): {:.5f}".format(SVM_rbf_part.score(X_test[:, (5,7,10,12)], y_test)))
print("SVM RBF (all f.): {:.5f}".format(SVM_linear_all.score(X_test, y_test)))


SVM Linear (4 f.): 0.67233
SVM Linear (all f.): 0.69492

SVM RBF (4 f.): 0.67926
SVM RBF (all f.): 0.69492


### Boosted Decision Tree

In [12]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

#Models
Ada_all = AdaBoostRegressor(DecisionTreeRegressor(max_depth=5), n_estimators=50)
Ada_part = AdaBoostRegressor(DecisionTreeRegressor(max_depth=5), n_estimators=50)

#Fitting
Ada_all.fit(X_train, y_train)
Ada_part.fit(X_train[:, (5,7,10,12)], y_train)

#Score
print("Boosted Decision Tree (4 f.): {:.5f}".format(Ada_part.score(X_test[:, (5,7,10,12)], y_test)))
print("Boosted Decision Tree (all f.): {:.5f}\n".format(Ada_all.score(X_test, y_test)))

Boosted Decision Tree (4 f.): 0.85291
Boosted Decision Tree (all f.): 0.82870



### Random Forest

In [13]:
from sklearn.ensemble import RandomForestRegressor

#Models
RF_all = RandomForestRegressor(bootstrap=True, n_estimators=50)
RF_part = RandomForestRegressor(bootstrap=True, n_estimators=50)

#Fitting
RF_all.fit(X_train, y_train)
RF_part.fit(X_train[:, (5,7,10,12)], y_train)

#Score
print("Random Forest (4 f.): {:.5f}".format(RF_part.score(X_test[:, (5,7,10,12)], y_test)))
print("Random Forest (all f.): {:.5f}\n".format(RF_all.score(X_test, y_test)))

Random Forest (4 f.): 0.81941
Random Forest (all f.): 0.78890



### Gradient Boosting

In [14]:
from sklearn.ensemble import GradientBoostingRegressor

#Models
GB_all = GradientBoostingRegressor(n_estimators=50)
GB_part = GradientBoostingRegressor(n_estimators=50)

#Fitting
GB_all.fit(X_train, y_train)
GB_part.fit(X_train[:, (5,7,10,12)], y_train)

#Score
print("Gradient Boosting (4 f.): {:.5f}".format(GB_part.score(X_test[:, (5,7,10,12)], y_test)))
print("Gradient Boosting (all f.): {:.5f}\n".format(GB_all.score(X_test, y_test)))

Gradient Boosting (4 f.): 0.86142
Gradient Boosting (all f.): 0.87053

