# Training the model


In [1]:
import pandas as pd
from pycaret.classification import *

In [15]:
# Cargar la base de datos
df = pd.read_csv('Cleaned_Data.csv')
df

Unnamed: 0,Customer ID,Gender,Age,Married,Number of Dependents,City,Zip Code,Latitude,Longitude,Number of Referrals,...,Contract,Paperless Billing,Payment Method,Monthly Charge,Total Charges,Total Refunds,Total Extra Data Charges,Customer Status,Churn Category,Churn Reason
0,0002-ORFBO,Female,37,Yes,0,Frazier Park,93225,34.827662,-118.999073,2,...,One Year,Yes,Credit Card,65.60,593.30,0.00,0,Stayed,Client currently in the company,.
1,0003-MKNFE,Male,46,No,0,Glendale,91206,34.162515,-118.203869,0,...,Month-to-Month,No,Credit Card,-4.00,542.40,38.33,10,Stayed,Client currently in the company,.
2,0004-TLHLJ,Male,50,No,0,Costa Mesa,92627,33.645672,-117.922613,0,...,Month-to-Month,Yes,Bank Withdrawal,73.90,280.85,0.00,0,Churned,Competitor,Competitor had better devices
3,0011-IGKFF,Male,78,Yes,0,Martinez,94553,38.014457,-122.115432,1,...,Month-to-Month,Yes,Bank Withdrawal,98.00,1237.85,0.00,0,Churned,Dissatisfaction,Product dissatisfaction
4,0013-EXCHZ,Female,75,Yes,0,Camarillo,93010,34.227846,-119.079903,3,...,Month-to-Month,Yes,Credit Card,83.90,267.40,0.00,0,Churned,Dissatisfaction,Network reliability
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,9987-LUTYD,Female,20,No,0,La Mesa,91941,32.759327,-116.997260,0,...,One Year,No,Credit Card,55.15,742.90,0.00,0,Stayed,Client currently in the company,.
7039,9992-RRAMN,Male,40,Yes,0,Riverbank,95367,37.734971,-120.954271,1,...,Month-to-Month,Yes,Bank Withdrawal,85.10,1873.70,0.00,0,Churned,Dissatisfaction,Product dissatisfaction
7040,9992-UJOEL,Male,22,No,0,Elk,95432,39.108252,-123.645121,0,...,Month-to-Month,Yes,Credit Card,50.30,92.75,0.00,0,Stayed,Client currently in the company,.
7041,9993-LHIEB,Male,21,Yes,0,Solana Beach,92075,33.001813,-117.263628,5,...,Two Year,No,Credit Card,67.85,4627.65,0.00,0,Stayed,Client currently in the company,.


In [17]:
# Entrenament model

# Selecting relevant columns
# Assuming 'Customer Status' is the target variable
# Removing 'Customer ID', 'Churn Category', and 'Churn Reason' as they are not useful for prediction
data = df.drop(columns=['Customer ID', 'Churn Category', 'Churn Reason'])
data['Customer Status'] = data['Customer Status'].replace('Joined', 'Stayed')

# Initialize setup
clf1 = setup(data, target='Customer Status', session_id=123, ignore_features=['City', 'Zip Code'])

# Compare models and select the best model
best_model = compare_models()

# Finalize the model
final_model = finalize_model(best_model)

# Save the model
save_model(final_model, 'best_churn_model')

# Print the best model
print(best_model)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Customer Status
2,Target type,Binary
3,Target mapping,"Churned: 0, Stayed: 1"
4,Original data shape,"(7043, 33)"
5,Transformed data shape,"(7043, 41)"
6,Transformed train set shape,"(4930, 41)"
7,Transformed test set shape,"(2113, 41)"
8,Ignore features,2
9,Numeric features,12


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.857,0.9138,0.857,0.8532,0.8539,0.6184,0.621,0.193
lightgbm,Light Gradient Boosting Machine,0.8525,0.9077,0.8525,0.8495,0.8501,0.6102,0.612,0.249
rf,Random Forest Classifier,0.845,0.8971,0.845,0.84,0.8408,0.5825,0.586,0.093
ada,Ada Boost Classifier,0.8442,0.9031,0.8442,0.8414,0.8423,0.5911,0.5921,0.068
lr,Logistic Regression,0.8383,0.8932,0.8383,0.8353,0.8363,0.5753,0.5764,0.226
lda,Linear Discriminant Analysis,0.8345,0.8833,0.8345,0.8308,0.8321,0.5637,0.5649,0.028
et,Extra Trees Classifier,0.8314,0.8852,0.8314,0.8265,0.8279,0.5508,0.5529,0.076
ridge,Ridge Classifier,0.8308,0.8833,0.8308,0.8243,0.8254,0.5407,0.545,0.027
dt,Decision Tree Classifier,0.7864,0.7323,0.7864,0.7894,0.7876,0.4587,0.4592,0.034
nb,Naive Bayes,0.7811,0.869,0.7811,0.8251,0.7916,0.5102,0.5303,0.027


Transformation Pipeline and Model Successfully Saved
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='log_loss', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_samples_leaf=1,
                           min_samples_split=2, min_weight_fraction_leaf=0.0,
                           n_estimators=100, n_iter_no_change=None,
                           random_state=123, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)


In [28]:
from pycaret.classification import load_model, predict_model

# Load the dataset
file_path = 'Cleaned_Data.csv'
original_data = pd.read_csv(file_path)

# Prepare the data for prediction (drop columns used only for training)
data_for_prediction = original_data.drop(columns=['Customer ID', 'Churn Category', 'Churn Reason', 'City', 'Zip Code'])
data_for_prediction['Customer Status'] = original_data['Customer Status'].replace('Joined', 'Stayed')

# Load the saved model
model_path = 'best_churn_model'
loaded_model = load_model(model_path)

# Predict using the loaded model
predictions = predict_model(loaded_model, data=data_for_prediction)


# Add the prediction probabilities to the original dataset
original_data['Predicted Label'] = predictions['prediction_label'] 
original_data['Prediction Probability'] = predictions['prediction_score']

# Save the updated dataset
original_data.to_csv('Data_with_Churn_Probability.csv', index=False)

# Display the first few rows of the updated dataset
print(original_data.head())


Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.8741,0.9334,0.8741,0.871,0.8716,0.6649,0.667


  Customer ID  Gender  Age Married  Number of Dependents          City  \
0  0002-ORFBO  Female   37     Yes                     0  Frazier Park   
1  0003-MKNFE    Male   46      No                     0      Glendale   
2  0004-TLHLJ    Male   50      No                     0    Costa Mesa   
3  0011-IGKFF    Male   78     Yes                     0      Martinez   
4  0013-EXCHZ  Female   75     Yes                     0     Camarillo   

   Zip Code   Latitude   Longitude  Number of Referrals  ...   Payment Method  \
0     93225  34.827662 -118.999073                    2  ...      Credit Card   
1     91206  34.162515 -118.203869                    0  ...      Credit Card   
2     92627  33.645672 -117.922613                    0  ...  Bank Withdrawal   
3     94553  38.014457 -122.115432                    1  ...  Bank Withdrawal   
4     93010  34.227846 -119.079903                    3  ...      Credit Card   

  Monthly Charge Total Charges  Total Refunds Total Extra Data Charg