In [1]:
import pandas as pd

### Loading Dataset

In [2]:
insurance_data = pd.read_csv('datasets/insurance_processed.csv')

insurance_data.sample(10)

Unnamed: 0,age,bmi,children,region,charges,sex_female,sex_male,smoker_no,smoker_yes
829,39,21.85,1,1,6117.4945,0,1,1,0
1022,47,36.08,1,2,42211.1382,0,1,0,1
63,28,25.935,1,1,4133.64165,1,0,1,0
721,53,36.6,3,3,11264.541,0,1,1,0
782,51,35.97,1,2,9386.1613,0,1,1,0
506,22,31.35,1,1,2643.2685,0,1,1,0
248,19,20.9,1,3,1832.094,0,1,1,0
763,27,26.03,0,0,3070.8087,0,1,1,0
681,19,20.3,0,3,1242.26,0,1,1,0
847,23,50.38,1,2,2438.0552,0,1,1,0


In [3]:
X = insurance_data.drop('charges', axis=1)

Y = insurance_data['charges']

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

##### Fit a DecisionTreeRegressor to the training set

In [5]:
from sklearn.tree import DecisionTreeRegressor

In [6]:
tree_reg1 = DecisionTreeRegressor(max_depth=3)
tree_reg1.fit(x_train, y_train)

DecisionTreeRegressor(max_depth=3)

##### Now train a second DecisionTreeRegressor on the residual errors made by the first predictor

In [7]:
y2 = y_train - tree_reg1.predict(x_train)

In [8]:
tree_reg2 = DecisionTreeRegressor(max_depth=3)
tree_reg2.fit(x_train, y2)

DecisionTreeRegressor(max_depth=3)

##### Now we train a third regressor on the residual errors made by the second predictor

In [9]:
y3 = y2 - tree_reg2.predict(x_train)

In [10]:
tree_reg3 = DecisionTreeRegressor(max_depth=3)
tree_reg3.fit(x_train, y3)

DecisionTreeRegressor(max_depth=3)

##### Now we have an ensemble containing three trees. It can make predictions on a new instance simply by adding up all the predictions of all the trees

In [11]:
y_pred = sum(tree.predict(x_test) for tree in (tree_reg1, tree_reg2, tree_reg3))

In [12]:
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.8072394014855426

### GradientBoostingRegressor
https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html

Following code creates thye same ensemble as we created above

In [13]:
from sklearn.ensemble import GradientBoostingRegressor

In [14]:
gbr = GradientBoostingRegressor(max_depth=3, n_estimators=3, learning_rate=1.0)

gbr.fit(x_train, y_train)

GradientBoostingRegressor(learning_rate=1.0, n_estimators=3)

In [15]:
y_pred = gbr.predict(x_test)

r2_score(y_test, y_pred)

0.8072394014855427