In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Chargement des données
data = pd.read_csv('https://raw.githubusercontent.com/RMoulla/MLW/main/customer_churn.csv')


# Exploration des données
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Names            900 non-null    object 
 1   Age              900 non-null    float64
 2   Total_Purchase   900 non-null    float64
 3   Account_Manager  900 non-null    int64  
 4   Years            900 non-null    float64
 5   Num_Sites        900 non-null    float64
 6   Onboard_date     900 non-null    object 
 7   Location         900 non-null    object 
 8   Company          900 non-null    object 
 9   Churn            900 non-null    int64  
dtypes: float64(4), int64(2), object(4)
memory usage: 70.4+ KB
None
              Age  Total_Purchase  Account_Manager       Years   Num_Sites  \
count  900.000000      900.000000       900.000000  900.000000  900.000000   
mean    41.816667    10062.824033         0.481111    5.273156    8.587778   
std      6.127560     2408.644532        

In [None]:
print(data.describe())

In [None]:
print(data.head())

In [7]:
data.columns

Index(['Names', 'Age', 'Total_Purchase', 'Account_Manager', 'Years',
       'Num_Sites', 'Onboard_date', 'Location', 'Company', 'Churn'],
      dtype='object')

In [11]:
columns_to_keep = ['Age', 'Total_Purchase', 'Num_Sites', 'Churn']
data = data[columns_to_keep]


X = data.drop('Churn', axis=1)
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Modélisation

In [12]:
import statsmodels.api as sm


X_train_sm = sm.add_constant(X_train)

model = sm.Logit(y_train, X_train_sm)
result = model.fit()


print(result.summary())

Optimization terminated successfully.
         Current function value: 0.279134
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                  Churn   No. Observations:                  720
Model:                          Logit   Df Residuals:                      716
Method:                           MLE   Df Model:                            3
Date:                Mon, 09 Sep 2024   Pseudo R-squ.:                  0.3742
Time:                        19:13:00   Log-Likelihood:                -200.98
converged:                       True   LL-Null:                       -321.17
Covariance Type:            nonrobust   LLR p-value:                 7.888e-52
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const            -14.9748      1.581     -9.473      0.000     -18.073     -11.877
Age              

In [13]:
X_train_sm

Unnamed: 0,const,Age,Total_Purchase,Num_Sites
10,1.0,30.0,11575.37,8.0
334,1.0,40.0,6773.23,11.0
244,1.0,37.0,15070.32,6.0
678,1.0,43.0,5907.07,8.0
306,1.0,46.0,11538.57,10.0
...,...,...,...,...
106,1.0,43.0,6715.23,8.0
270,1.0,44.0,9613.84,8.0
860,1.0,39.0,13731.45,10.0
435,1.0,36.0,12879.70,9.0


## Evaluation

In [21]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# sur les donnes de train
y_train_pred = result.predict(X_train_sm)
y_train_pred_label = (y_train_pred > 0.5).astype(int)


print("Accuracy: ", accuracy_score(y_train, y_train_pred_label))

Accuracy:  0.8875


In [22]:
print("Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred_label))

Confusion Matrix:
 [[574  28]
 [ 53  65]]


In [23]:
print("Classification Report:\n", classification_report(y_train, y_train_pred_label))

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.95      0.93       602
           1       0.70      0.55      0.62       118

    accuracy                           0.89       720
   macro avg       0.81      0.75      0.78       720
weighted avg       0.88      0.89      0.88       720



In [18]:
# sur les données de test
X_test_sm = sm.add_constant(X_test)
y_test_pred = result.predict(X_test_sm)

y_test_pred_label = (y_test_pred > 0.5).astype(int)



print("Accuracy: ", accuracy_score(y_test, y_test_pred_label))

Accuracy:  0.8666666666666667


In [19]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred_label))

Confusion Matrix:
 [[142   6]
 [ 18  14]]


In [20]:
print("Classification Report:\n", classification_report(y_test, y_test_pred_label))

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.92       148
           1       0.70      0.44      0.54        32

    accuracy                           0.87       180
   macro avg       0.79      0.70      0.73       180
weighted avg       0.85      0.87      0.85       180



## Save le model

In [24]:
import pickle


with open('logistic_model.pkl', 'wb') as f:
    pickle.dump(result, f)

In [25]:
with open('logistic_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# to check
y_test_pred_loaded = loaded_model.predict(X_test_sm)
print("Loaded model test accuracy: ", accuracy_score(y_test, (y_test_pred_loaded > 0.5).astype(int)))


Loaded model test accuracy:  0.8666666666666667


## Flask

In [None]:
from flask import Flask, request, render_template
import pickle
import numpy as np

app = Flask(__name__)


@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    # Saisir des données
    features = [float(x) for x in request.form.values()]
    final_features = np.array([features])

    # Prédiction
    prediction = model.predict(sm.add_constant(final_features))
    output = prediction[0]

    return render_template('index.html', prediction_text='Churn Probability: {:.2f}'.format(output))

if __name__ == "__main__":
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
