# 7.3.2 Visualización del ajuste de boosted trees en regresion

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from scipy.interpolate import griddata
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings("ignore")

Link: https://youtu.be/jucweMJNkwA?si=8aRWYDPwa_rh5PB5

Links: https://blog.tensorflow.org/2019/03/how-to-train-boosted-trees-models-in-tensorflow.html

Los ensambles de árboles de regresión permiten aproximar relaciones muy complejas entre las variables de entrada y las variables de salida. En esta lección se presenta un ejemplo de regresión usando scikit-learn, el cual fue adaptado de la documentación de TensorFlow.

## 7.3.2.1 Datasets

In [None]:
#
# Datos para entrenamiento
#
npts = 5000
np.random.seed(0)

x = np.random.uniform(-2, 2, npts)
y = np.random.uniform(-2, 2, npts)
z = x * np.exp(-(x ** 2) - y ** 2)

df = pd.DataFrame(
    {
        "x": x,
        "y": y,
        "z": z,
    }
)

NUM_EXAMPLES = len(z)

In [None]:
#
# Datos para pronóstico
#

xi = (np.linspace(-2.0, 2.0, 200),)
yi = (np.linspace(-2.1, 2.1, 210),)
xi, yi = np.meshgrid(xi, yi)

df_predict = pd.DataFrame(
    {
        "x": xi.flatten(),
        "y": yi.flatten(),
    }
)

predict_shape = xi.shape

## 7.3.2.2 Grafica de contorno

In [None]:
def plot_contour(x, y, z, **kwargs):

    plt.figure(figsize=(10, 8))

    CS = plt.contour(x, y, z, 15, linewidths=1.0, colors="k")

    CS = plt.contourf(x, y, z, 15, cmap="RdBu_r")

    plt.colorbar()
    plt.xlim(-2, 2)
    plt.ylim(-2, 2)

In [None]:
zi = griddata(df[["x", "y"]].values, df.z, (xi, yi), method="linear")

plot_contour(xi, yi, zi)
plt.scatter(df.x, df.y, marker=".")
plt.title("Contour on training data");

## 7.3.2.3 Regresión lineal

In [None]:
linearRegression = LinearRegression(fit_intercept=True, n_jobs=-1)

linearRegression.fit(df[["x", "y"]].values, df.z)

plot_contour(
    xi,
    yi,
    linearRegression.predict(df_predict[["x", "y"]].values).reshape(predict_shape),
)

## 7.3.2.4 Gradient Boosting Regressor

In [None]:
def create_gbt(n_estimators):

    gradientBoostingRegressor = GradientBoostingRegressor(
        loss="squared_error",
        learning_rate=0.1,
        n_estimators=n_estimators,
        subsample=1.0,
        criterion="friedman_mse",
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_depth=6,
        min_impurity_decrease=0.0,
        init=None,
        random_state=12345,
        max_features=None,
        alpha=0.9,
        verbose=0,
        max_leaf_nodes=None,
        warm_start=False,
        # presort="auto",
        validation_fraction=0.1,
        n_iter_no_change=None,
        tol=0.0001,
    )

    gradientBoostingRegressor.fit(df[["x", "y"]].values, df.z)

    plot_contour(
        xi,
        yi,
        gradientBoostingRegressor.predict(df_predict[["x", "y"]].values).reshape(
            predict_shape
        ),
    )

    plt.text(-2.0, 2.1, "# trees: {}".format(n_estimators), size=20);

In [None]:
create_gbt(1)

In [None]:
create_gbt(5)

In [None]:
create_gbt(10)

In [None]:
create_gbt(20)

In [None]:
create_gbt(50)

## 7.3.2.5 Parte 2

In [None]:
def create_gbt2(n_estimators):

    gradientBoostingRegressor = GradientBoostingRegressor(
        loss="squared_error",
        learning_rate=0.1,
        n_estimators=n_estimators,
        subsample=1.0,
        criterion="friedman_mse",
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_depth=6,
        min_impurity_decrease=0.0,
        init=None,
        random_state=12345,
        max_features=None,
        alpha=0.9,
        verbose=0,
        max_leaf_nodes=None,
        warm_start=False,
        # presort="auto",
        validation_fraction=0.1,
        n_iter_no_change=None,
        tol=0.0001,
    )

    gradientBoostingRegressor.fit(df[["x", "y"]].values, df.z)

    y_pred = gradientBoostingRegressor.predict(df[["x", "y"]].values)
    df['predicciones'] = y_pred

    return df

In [None]:
df2 = create_gbt2(2)
df2.head()

In [None]:
mse = mean_squared_error(df['z'], df['predicciones'])
print(f'Mean Squared Error on the test set: {mse}')

In [None]:
plt.plot(df['z'], df['predicciones'], marker='o', linestyle='-', color='b', label='Datos de ejemplo')

# Personalizar el gráfico
plt.title('Gráfico de dos variables')
plt.xlabel('Variable X')
plt.ylabel('Variable Y')
plt.legend()  # Agregar la leyenda

# Mostrar el gráfico
plt.show()

## 7.3.2.6 Parte 3

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
tf.enable_eager_execution()

# Load dataset.
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

In [None]:
print('ok_')