## Imports

In [1]:
import pandas as pd
import numpy as np

import abstraction.visualization_functions as VisualizationFunctions
import abstraction.model_functions as ModelFunctions

from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

import warnings
warnings.filterwarnings('ignore')

## Dataset

In [2]:
df_churn_filtered = pd.read_csv('datasets/churn_filtered.csv')

In [3]:
df_churn_filtered.head()

Unnamed: 0,Gender,Age,Married,Number of Dependents,Number of Referrals,Tenure in Months,Offer,Phone Service,Avg Monthly Long Distance Charges,Multiple Lines,...,Contract,Paperless Billing,Payment Method,Monthly Charge,Total Charges,Total Refunds,Total Extra Data Charges,Total Long Distance Charges,Total Revenue,Customer Status
0,Female,37,Yes,0,2,9,No Offer,Yes,42.39,No,...,One Year,Yes,Credit Card,65.6,593.3,0.0,0,381.51,974.81,0
1,Male,46,No,0,0,9,No Offer,Yes,10.69,Yes,...,Month-to-Month,No,Credit Card,-4.0,542.4,38.33,10,96.21,610.28,0
2,Male,50,No,0,0,4,Offer E,Yes,33.65,No,...,Month-to-Month,Yes,Bank Withdrawal,73.9,280.85,0.0,0,134.6,415.45,1
3,Male,78,Yes,0,1,13,Offer D,Yes,27.82,No,...,Month-to-Month,Yes,Bank Withdrawal,98.0,1237.85,0.0,0,361.66,1599.51,1
4,Female,75,Yes,0,3,3,No Offer,Yes,7.38,No,...,Month-to-Month,Yes,Credit Card,83.9,267.4,0.0,0,22.14,289.54,1


In [4]:
df_churn_filtered.columns.tolist()

['Gender',
 'Age',
 'Married',
 'Number of Dependents',
 'Number of Referrals',
 'Tenure in Months',
 'Offer',
 'Phone Service',
 'Avg Monthly Long Distance Charges',
 'Multiple Lines',
 'Internet Service',
 'Internet Type',
 'Avg Monthly GB Download',
 'Online Security',
 'Online Backup',
 'Device Protection Plan',
 'Premium Tech Support',
 'Streaming TV',
 'Streaming Movies',
 'Streaming Music',
 'Unlimited Data',
 'Contract',
 'Paperless Billing',
 'Payment Method',
 'Monthly Charge',
 'Total Charges',
 'Total Refunds',
 'Total Extra Data Charges',
 'Total Long Distance Charges',
 'Total Revenue',
 'Customer Status']

In [5]:
df_churn_filtered.shape

(6589, 31)

## Treinamento

In [6]:
TARGET = "Customer Status"
RANDOM_STATE = 42

In [7]:
X_train, X_test, y_train, y_test = ModelFunctions.model_train_test(df_churn_filtered, TARGET)

In [8]:
print(f'X_train - Proporção da classe 0: {y_train.value_counts(normalize=True)[0]}')
print(f'y_train - Proporção da classe 1: {y_train.value_counts(normalize=True)[1]}')
print(f'X_test - Proporção da classe 0: {y_train.value_counts(normalize=True)[0]}')
print(f'y_test - Proporção da classe 1: {y_train.value_counts(normalize=True)[1]}')

X_train - Proporção da classe 0: 0.7183434518647008
y_train - Proporção da classe 1: 0.2816565481352992
X_test - Proporção da classe 0: 0.7183434518647008
y_test - Proporção da classe 1: 0.2816565481352992


In [9]:
X_train_processed, X_test_processed = ModelFunctions.model_pre_process(df_churn_filtered, TARGET, X_train, X_test)

### Subir mlflow

In [10]:
ModelFunctions.mlflow_up('Predição de Churn agora vai em hd')

2024/11/25 19:55:25 INFO mlflow.tracking.fluent: Experiment with name 'Predição de Churn agora vai em hd' does not exist. Creating a new experiment.


### SVM

In [11]:
svm_model = SVC()

svm_rs_run_id, svm_rs_model = ModelFunctions.supervised_rand_search_cv(svm_model, ModelFunctions.PARAM_GRID_SVM, X_train_processed, X_test_processed, y_train, y_test)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


2024/11/25 20:28:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run Supervised_RandomSearchCV_SVC at: http://127.0.0.1:5000/#/experiments/112013621222109636/runs/0d28831a0b3e4bc4b39f599640a39e60.
2024/11/25 20:28:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/112013621222109636.


MLflow Run ID: 0d28831a0b3e4bc4b39f599640a39e60
Melhores parâmetros: {'kernel': 'linear', 'gamma': 'auto', 'degree': 2, 'C': 100}
Precisão (acurácia): 0.8512898330804249


### XGBoost

In [12]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

xgb_rs_run_id, xgb_rs_model = ModelFunctions.supervised_rand_search_cv(xgb_model, ModelFunctions.PARAM_GRID_XGB, X_train_processed, X_test_processed, y_train, y_test)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

MLflow Run ID: 4cdc2b4e3bbf46a2bcba44f4b32d3e65
Melhores parâmetros: {'subsample': 0.6, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.1}
Precisão (acurácia): 0.8710166919575114


### GradientBoost

In [13]:
gradientboost_model = GradientBoostingClassifier()

gradientboost_rs_run_id, gradientboost_rs_model = ModelFunctions.supervised_rand_search_cv(gradientboost_model, ModelFunctions.PARAM_GRID_GRADIENTBOOST, X_train_processed, X_test_processed, y_train, y_test)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


2024/11/25 20:34:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run Supervised_RandomSearchCV_GradientBoostingClassifier at: http://127.0.0.1:5000/#/experiments/112013621222109636/runs/82e8d4e741714c0c940185512b99056d.
2024/11/25 20:34:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/112013621222109636.


MLflow Run ID: 82e8d4e741714c0c940185512b99056d
Melhores parâmetros: {'subsample': 0.6, 'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_depth': 3, 'learning_rate': 0.1}
Precisão (acurácia): 0.8689934243803743


### AdaBoost

In [None]:
adaboost_model = AdaBoostClassifier(random_state=RANDOM_STATE)

adaboost_rs_run_id, adaboost_rs_model = ModelFunctions.supervised_rand_search_cv(adaboost_model, ModelFunctions.PARAM_GRID_ADABOOST, X_train_processed, X_test_processed, y_train, y_test)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


2024/11/25 20:52:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Supervised_RandomSearchCV_AdaBoostClassifier at: http://127.0.0.1:5000/#/experiments/112013621222109636/runs/60840e52ea9543f098ae72487b05f193.
2024/11/25 20:52:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/112013621222109636.


ValueError: 
All the 50 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "/home/marqsleal/Dev/Python/ml-tempalte-poc/venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/marqsleal/Dev/Python/ml-tempalte-poc/venv/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/marqsleal/Dev/Python/ml-tempalte-poc/venv/lib/python3.12/site-packages/sklearn/ensemble/_weight_boosting.py", line 149, in fit
    self._validate_estimator()
  File "/home/marqsleal/Dev/Python/ml-tempalte-poc/venv/lib/python3.12/site-packages/sklearn/ensemble/_weight_boosting.py", line 537, in _validate_estimator
    raise ValueError(
ValueError: KNeighborsClassifier doesn't support sample_weight.
