# Data Science Bootcamp
# <center> **Aula 20b -- Random Forests**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.tree import plot_tree
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

## Load data

In [None]:
iris = load_iris()
X = iris['data']
y = iris['target']

In [None]:
colors = {0:'red', 1:'blue', 2:'green'}
c = []
for i in range(0, len(y)):
    c.append(colors[y[i]])

In [None]:
# variables
u = 2
v = 3

In [None]:
plt.figure()
plt.scatter(X[:,u], X[:,v], c=c)
plt.show()

In [None]:
# separamos en clases 0, 1, 2
y0 = np.argwhere(y==0)
y1 = np.argwhere(y==1)
y2 = np.argwhere(y==2)

X0 = X[y0]
X1 = X[y1]
X2 = X[y2]

# extract class 0 and 1
X01 = np.vstack([X0,X1]).reshape(-1,4)
y01 = np.stack(np.array(50*[0] + 50*[1]))

# extract class 1 and 2
X12 = np.vstack([X1,X2]).reshape(-1,4)
y12 = np.stack(np.array(50*[0] + 50*[1]))

In [None]:
print(X.shape, y.shape)
print(X01.shape, y01.shape)
print(X12.shape, y12.shape)

In [None]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, y, train_size=0.8, stratify=y,
                                                random_state=2021)

In [None]:
print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)

# Random Forest

## Ensemble Random Forest

In [None]:
model = RandomForestClassifier(n_estimators=500, criterion='gini', max_depth=2,
                               bootstrap=True, random_state=None)

In [None]:
model.fit(X[:,[u,v]], y)

In [None]:
model.estimators_[0]

In [None]:
# plt.figure(figsize=(20,5))
plt.subplot(1,3,1)
plot_tree(model.estimators_[0])
plt.subplot(1,3,2)
plot_tree(model.estimators_[1])
plt.subplot(1,3,3)
plot_tree(model.estimators_[2])
plt.show()

In [None]:
yhat = model.predict(X[:,[u,v]])
print(yhat)

In [None]:
cfmatrix = confusion_matrix(y, yhat)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y, yhat))

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X[:,u].min() - 1, X[:,u].max() + 1
y_min, y_max = X[:,v].min() - 1, X[:,v].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
# plt.figure(figsize=(6,6))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:,u], X[:,v], c=y, s=20, edgecolor='k')
plt.title('Random Forest (500 trees)')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()

## Ada Boosting

In [None]:
alpha = 0.1
treemodel = DecisionTreeClassifier(criterion='gini', max_depth=2, random_state=None)

Adamodel = AdaBoostClassifier(base_estimator=treemodel, n_estimators=500, 
                              learning_rate=alpha, random_state=None)

In [None]:
Adamodel.fit(X[:,[u,v]], y)

In [None]:
Adamodel.estimators_[0]

In [None]:
plt.figure(figsize=(20,5))
plt.subplot(1,3,1)
plot_tree(Adamodel.estimators_[0])
plt.subplot(1,3,2)
plot_tree(Adamodel.estimators_[1])
plt.subplot(1,3,3)
plot_tree(Adamodel.estimators_[2])
plt.show()

In [None]:
yhat = Adamodel.predict(X[:,[u,v]])
print(yhat)

In [None]:
cfmatrix = confusion_matrix(y, yhat)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y, yhat))

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X[:,u].min() - 1, X[:,u].max() + 1
y_min, y_max = X[:,v].min() - 1, X[:,v].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
# plt.figure(figsize=(6,6))
Z = Adamodel.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:,u], X[:,v], c=y, s=20, edgecolor='k')
plt.title('Adaptive Boosting (500 trees)')
#plt.h([0,0], w, 'k-')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()

## Gradient Boosting

In [None]:
alpha = 0.1
Gradmodel = GradientBoostingClassifier(n_estimators=500, subsample=0.9,
                                       learning_rate=alpha, random_state=None)

In [None]:
Gradmodel.fit(X[:,[u,v]], y)

In [None]:
Gradmodel.estimators_[0][0]

In [None]:
plt.figure(figsize=(20,5))
plt.subplot(1,3,1)
plot_tree(Gradmodel.estimators_[0][0])
plt.subplot(1,3,2)
plot_tree(Gradmodel.estimators_[1][0])
plt.subplot(1,3,3)
plot_tree(Gradmodel.estimators_[2][0])
plt.show()

In [None]:
yhat = Gradmodel.predict(X[:,[u,v]])
print(yhat)

In [None]:
cfmatrix = confusion_matrix(y, yhat)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y, yhat))

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X[:,u].min() - 1, X[:,u].max() + 1
y_min, y_max = X[:,v].min() - 1, X[:,v].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
plt.figure(figsize=(6,6))
Z = Gradmodel.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:,u], X[:,v], c=y, s=20, edgecolor='k')
plt.title('Gradient Boosting (500 trees)')
#plt.h([0,0], w, 'k-')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()

# Comparación

In [None]:
alpha = 0.1
D = 3
N = 200

In [None]:
RF =  RandomForestClassifier(n_estimators=N, criterion='gini', max_depth=D,
                             bootstrap=True, random_state=None)

treemodel = DecisionTreeClassifier(criterion='gini', max_depth=D, random_state=None)

Ada = AdaBoostClassifier(base_estimator=treemodel, n_estimators=N,
                         learning_rate=alpha, random_state=None)

Grad = GradientBoostingClassifier(n_estimators=N, subsample=0.9, max_depth=D,
                                  learning_rate=alpha, random_state=None)

In [None]:
RF.fit(Xtrain, Ytrain)
Ada.fit(Xtrain, Ytrain)
Grad.fit(Xtrain, Ytrain)

In [None]:
plt.figure(figsize=(8,3))
plt.subplot(1,2,1)
plt.plot(Ada.estimator_errors_)
plt.title('Ada Boosting error')
plt.subplot(1,2,2)
plt.plot(Grad.train_score_)
plt.title('Gradient Boosting loss')
plt.show()

In [None]:
yRF = RF.predict(Xtest)
yAD = Ada.predict(Xtest)
yGR = Grad.predict(Xtest)

In [None]:
RFcm = confusion_matrix(Ytest, yRF)
ADcm = confusion_matrix(Ytest, yAD)
GRcm = confusion_matrix(Ytest, yGR)

In [None]:
print(RFcm, '\n')
print(ADcm, '\n')
print(GRcm)

In [None]:
print(classification_report(Ytest, yRF), '\n')
print(classification_report(Ytest, yAD), '\n')
print(classification_report(Ytest, yGR))

## Feature importances

In [None]:
def plot_importances(model, features, modelname='Model', fsz=(5,5)):
    #for name, importance in zip(names, model.feature_importances_):
    #    print(name, "=", importance)
    imp = model.feature_importances_
    idx = np.argsort(imp)
    plt.figure(figsize=fsz)
    plt.title('{} Feature Importances'.format(modelname))
    plt.barh(range(len(idx)), imp[idx], color='b', align='center')
    plt.yticks(range(len(idx)), [features[i] for i in idx])
    plt.xlabel('Relative Importance')
    plt.show()

In [None]:
names = iris.feature_names

In [None]:
plot_importances(RF, names, modelname='Random Forest', fsz=(4,3))

In [None]:
plot_importances(Ada, names, modelname='Ada Boosting', fsz=(4,3))

In [None]:
plot_importances(Grad, names, modelname='Gradient Boosting', fsz=(4,3))