In [1]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_curve, plot_roc_curve, auc, roc_auc_score, classification_report, confusion_matrix, plot_confusion_matrix
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import utils
import preprocessing

In [2]:
df = preprocessing.preprocessing_base_parte_1(utils.feature_engineering(utils.get_data()))

In [6]:
params = { 'max_depth': range(1,8), 'loss': ('deviance', 'exponential'), 'max_features': ('sqrt', 'log2'), 'learning_rate': [0.005, 0.01, 0.05, 0.075, 0.1, 0.25, 0.5] }
model = GradientBoostingClassifier()

In [7]:
gscv = GridSearchCV(model, params, scoring='roc_auc', n_jobs=-1)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('tiene_alto_valor_adquisitivo',1), df['tiene_alto_valor_adquisitivo'], random_state=112)

In [None]:
gscv.fit(X_train, y_train)
score = roc_auc_score(y_test, gscv.predict_proba(X_test)[:,1])

In [None]:
gscv.best_params_

In [None]:
score

In [None]:
print(classification_report(y_test,gscv.predict(X_test)))

In [None]:
fig, ax = plt.subplots(figsize=(15,7))
plt.grid(False)
plot_confusion_matrix(gscv, X_test, y_test, cmap=plt.cm.Blues, display_labels=['1', '0'], ax=ax)
plt.show()

In [None]:
plot_roc_curve(gscv, X_test, y_test)