In [28]:
!pip install --upgrade scikit-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import pandas as pd
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
import sklearn.metrics as metrics
import matplotlib as plt
import numpy as np

In [4]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor

In [5]:
housing_data = datasets.fetch_california_housing(as_frame=True)

In [6]:
print(housing_data.feature_names)
print(housing_data.target)
print(housing_data.data.head())

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
0        4.526
1        3.585
2        3.521
3        3.413
4        3.422
         ...  
20635    0.781
20636    0.771
20637    0.923
20638    0.847
20639    0.894
Name: MedHouseVal, Length: 20640, dtype: float64
   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  


In [7]:
X_train, X_test, y_train, y_test = train_test_split(housing_data.data, housing_data.target, test_size=0.2,random_state=109)

print(len(X_train))

print(len(X_test))

16512
4128


In [8]:
steps = [('scaler', StandardScaler()), ('SVM', SVR())]
#StandardScaler() Standardizes features by removing the mean and scaling to unit variance
#SVC is an SVM written in C

from sklearn.pipeline import Pipeline
pipeline = Pipeline(steps) # define the pipeline object


In [11]:
parameters = {'SVM__C':[0.1, 1], 'SVM__gamma': [0.01, 0.1]}
grid = GridSearchCV(pipeline, param_grid=parameters, cv=5)

In [12]:
grid.fit(X_train, y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('SVM', SVR())]),
             param_grid={'SVM__C': [0.1, 1], 'SVM__gamma': [0.01, 0.1]})

In [13]:
housing_predictions = grid.predict(X_test)
grid_mse = metrics.mean_squared_error(y_test, housing_predictions)

grid_mse = np.sqrt(grid_mse)

grid_mse

0.60319920708803

In [14]:
grid.best_params_

{'SVM__C': 1, 'SVM__gamma': 0.1}

In [50]:
parameters = {'SVM__C':[1], 'SVM__gamma': [0.1]}
grid = GridSearchCV(pipeline, param_grid=parameters, cv=5)
grid.fit(X_train, y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('SVM', SVR())]),
             param_grid={'SVM__C': [1], 'SVM__gamma': [0.1]})

In [15]:
grad_b_tree_1 = GradientBoostingRegressor(random_state=0, n_estimators=3, learning_rate=1, max_depth=6)

grad_b_tree_1.fit(X_train, y_train)

GradientBoostingRegressor(learning_rate=1, max_depth=6, n_estimators=3,
                          random_state=0)

In [16]:
housing_predictions = grad_b_tree_1.predict(X_test)

grad_b_tree_1_mse = metrics.mean_squared_error(y_test, housing_predictions)



grad_b_tree_1_mse = np.sqrt(grad_b_tree_1_mse)


grad_b_tree_1_mse

0.5778133468835156

In [17]:
grad_b_tree_3 = GradientBoostingRegressor(random_state=0, n_estimators=500, learning_rate=0.02, max_depth=8)

grad_b_tree_3.fit(X_train, y_train)

GradientBoostingRegressor(learning_rate=0.02, max_depth=8, n_estimators=500,
                          random_state=0)

In [18]:
housing_predictions = grad_b_tree_3.predict(X_test)

grad_b_tree_3_mse = metrics.mean_squared_error(y_test, housing_predictions)



grad_b_tree_3_mse = np.sqrt(grad_b_tree_3_mse)


grad_b_tree_3_mse

0.46278899921044225

In [20]:
grad_b_tree_3 = GradientBoostingRegressor(random_state=0, n_estimators=500, learning_rate=0.02, max_depth=12)

grad_b_tree_3.fit(X_train, y_train)

GradientBoostingRegressor(learning_rate=0.02, max_depth=12, n_estimators=500,
                          random_state=0)

In [21]:
housing_predictions = grad_b_tree_3.predict(X_test)

grad_b_tree_3_mse = metrics.mean_squared_error(y_test, housing_predictions)



grad_b_tree_3_mse = np.sqrt(grad_b_tree_3_mse)


grad_b_tree_3_mse

0.4876974925974807

In [22]:
grad_b_tree_3 = GradientBoostingRegressor(random_state=0, n_estimators=500, learning_rate=0.02, max_depth=10)

grad_b_tree_3.fit(X_train, y_train)

GradientBoostingRegressor(learning_rate=0.02, max_depth=10, n_estimators=500,
                          random_state=0)

In [23]:
housing_predictions = grad_b_tree_3.predict(X_test)

grad_b_tree_3_mse = metrics.mean_squared_error(y_test, housing_predictions)



grad_b_tree_3_mse = np.sqrt(grad_b_tree_3_mse)


grad_b_tree_3_mse

0.4737588827298426

In [52]:
ada_1 = AdaBoostRegressor(learning_rate=0.01, n_estimators=5000)

ada_1.fit(X_train, y_train)

AdaBoostRegressor(learning_rate=0.01, n_estimators=5000)

In [53]:
housing_predictions = ada_1.predict(X_test)

ada_1_mse = metrics.mean_squared_error(y_test, housing_predictions)



ada_1_mse = np.sqrt(ada_1_mse)


ada_1_mse

0.780451492745576

In [40]:
from sklearn.linear_model import LassoCV, RidgeCV, ElasticNetCV

In [41]:
Ridge_reg = RidgeCV()
Ridge_reg.fit(X_train, y_train)

RidgeCV(alphas=array([ 0.1,  1. , 10. ]))

In [42]:
housing_predictions = Ridge_reg.predict(X_test)

Ridge_reg_mse = metrics.mean_squared_error(y_test, housing_predictions)



Ridge_reg_mse = np.sqrt(Ridge_reg_mse)


Ridge_reg_mse

0.7368936995791103

In [46]:
Elastic_reg = ElasticNetCV()
Elastic_reg.fit(X_train, y_train)

ElasticNetCV()

In [47]:
housing_predictions = Elastic_reg.predict(X_test)

Elastic_reg_mse = metrics.mean_squared_error(y_test, housing_predictions)



Elastic_reg_mse = np.sqrt(Elastic_reg_mse)


Elastic_reg_mse

0.7539903938255673

In [55]:
from sklearn.ensemble import StackingRegressor

print(X_train.shape)
print(y_train.shape)
estimators = [('tree', grad_b_tree_3), ('svm1', grid), ('ada1', ada_1)]
clf = StackingRegressor(estimators=estimators)
clf.fit(X_train, y_train)



(16512, 8)
(16512,)


StackingRegressor(estimators=[('tree',
                               GradientBoostingRegressor(learning_rate=0.02,
                                                         max_depth=8,
                                                         n_estimators=500,
                                                         random_state=0)),
                              ('svm1',
                               GridSearchCV(cv=5,
                                            estimator=Pipeline(steps=[('scaler',
                                                                       StandardScaler()),
                                                                      ('SVM',
                                                                       SVR())]),
                                            param_grid={'SVM__C': [1],
                                                        'SVM__gamma': [0.1]})),
                              ('ada1',
                               AdaBoostRegressor(learni

In [56]:
housing_predictions = clf.predict(X_test)

clf_mse = metrics.mean_squared_error(y_test, housing_predictions)



clf_mse = np.sqrt(clf_mse)


clf_mse

#0.46256 mse score with just grad_b_tree 1 and 3 

0.46227134831820116