In [28]:
import xgboost as xgb
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import make_scorer, accuracy_score
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
df = pd.read_csv('./dados/cleaned_dataset.csv')

In [7]:
df = df.drop(columns=['Id'])

In [9]:
X = df[['Score', 'Gender', 'Age', 'Assets', 'Products', 'Active']]
y = df['Churned']

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [21]:
def xgb_cv(max_depth, learning_rate, n_estimators, gamma, min_child_weight, subsample, colsample_bytree):
    try:
        model = xgb.XGBClassifier(
            max_depth=int(max_depth),
            learning_rate=learning_rate,
            n_estimators=int(n_estimators),
            gamma=gamma,
            min_child_weight=int(min_child_weight),
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            objective='binary:logistic',
            eval_metric='logloss',
            random_state=42  # Removed 'use_label_encoder'
        )
        
        # Perform cross-validation and return the mean accuracy
        cv_result = cross_val_score(model, X_train, y_train, cv=3, scoring=make_scorer(accuracy_score))
        return cv_result.mean()
    
    except ValueError as e:
        print("Error encountered during function evaluation:", e)
        return None  # Return None so Bayesian Optimization skips this iteration


In [22]:
# Set the parameter bounds for Bayesian Optimization
param_bounds = {
    'max_depth': (3, 10),
    'learning_rate': (0.01, 0.3),
    'n_estimators': (50, 500),
    'gamma': (0, 5),
    'min_child_weight': (1, 10),
    'subsample': (0.5, 1.0),
    'colsample_bytree': (0.5, 1.0)
}


In [23]:
# Run Bayesian Optimization
optimizer = BayesianOptimization(f=xgb_cv, pbounds=param_bounds, random_state=42)
optimizer.maximize(init_points=5, n_iter=25)

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | n_esti... | subsample |
-------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.8487   [39m | [39m0.6873   [39m | [39m4.754    [39m | [39m0.2223   [39m | [39m7.191    [39m | [39m2.404    [39m | [39m120.2    [39m | [39m0.529    [39m |
| [39m2        [39m | [39m0.8187   [39m | [39m0.9331   [39m | [39m3.006    [39m | [39m0.2153   [39m | [39m3.144    [39m | [39m9.729    [39m | [39m424.6    [39m | [39m0.6062   [39m |
| [39m3        [39m | [39m0.8254   [39m | [39m0.5909   [39m | [39m0.917    [39m | [39m0.09823  [39m | [39m6.673    [39m | [39m4.888    [39m | [39m181.1    [39m | [39m0.8059   [39m |
| [39m4        [39m | [39m0.8265   [39m | [39m0.5697   [39m | [39m1.461    [39m | [39m0.1162   [39m | [39m6.192    [39m | [39m8.067    [39m | [39m139.9    [39m | [

In [24]:
best_model = xgb.XGBClassifier(
    colsample_bytree=0.8089,
    gamma=4.872,
    learning_rate=0.0763,
    max_depth=6,
    min_child_weight=1,
    n_estimators=108,
    subsample=0.5184,
    objective='binary:logistic',
    eval_metric='logloss',
    random_state=42
)

In [25]:
# Fit the model on the training data
best_model.fit(X_train, y_train)

In [31]:
# Make predictions on the test data
y_pred_class = best_model.predict(X_test)

In [32]:
cnf_matrix = confusion_matrix(y_test, y_pred_class)

In [33]:
print('Confusion Matrix:')
print(cnf_matrix)
print('\nAccuracy', accuracy_score(y_test, y_pred_class))

Confusion Matrix:
[[77  2]
 [12  9]]

Accuracy 0.86


In [34]:
cnf_table = pd.DataFrame(data=cnf_matrix, index=['Non-Churned', 'Churned'], columns=['Non_churned(pred)', 'Churned(pred)'])

In [35]:
print(cnf_table)

             Non_churned(pred)  Churned(pred)
Non-Churned                 77              2
Churned                     12              9


In [36]:
print(classification_report(y_test, y_pred_class, target_names=['Non Churned', 'Churned']))

              precision    recall  f1-score   support

 Non Churned       0.87      0.97      0.92        79
     Churned       0.82      0.43      0.56        21

    accuracy                           0.86       100
   macro avg       0.84      0.70      0.74       100
weighted avg       0.86      0.86      0.84       100

