### Importing Libraries

In [44]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

### Reading csv

In [45]:
df=pd.read_csv("tel_churn.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


In [46]:
df=df.drop('Unnamed: 0',axis=1)

In [47]:
x=df.drop('Churn',axis=1)
x

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,1,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.50,0,1,1,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,0,1,1,0,1,0,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.30,1840.75,0,1,1,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,0,70.70,151.65,1,0,1,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0,84.80,1990.50,0,1,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,0
7028,0,103.20,7362.90,1,0,0,1,0,1,0,...,0,1,0,0,0,0,0,0,0,1
7029,0,29.60,346.45,1,0,0,1,0,1,1,...,0,0,1,0,1,0,0,0,0,0
7030,1,74.40,306.60,0,1,0,1,1,0,0,...,0,0,0,1,1,0,0,0,0,0


In [48]:
y=df['Churn']
y

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64

### Train Test Split

In [49]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

### Decision Tree Classifier

In [50]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [51]:
model_dt.fit(x_train,y_train)

In [52]:
y_pred=model_dt.predict(x_test)
y_pred

array([1, 0, 1, ..., 0, 1, 0], dtype=int64)

In [53]:
print(model_dt.score(x_test,y_test))
print(classification_report(y_test, y_pred, labels=[0,1]))

0.7711442786069652
              precision    recall  f1-score   support

           0       0.81      0.88      0.85      1006
           1       0.62      0.49      0.55       401

    accuracy                           0.77      1407
   macro avg       0.72      0.69      0.70      1407
weighted avg       0.76      0.77      0.76      1407



### As you can see that the accuracy is quite low, and as it's an imbalanced dataset, we shouldn't consider Accuracy as our metrics to measure the model, as Accuracy is cursed in imbalanced datasets.
### Hence, we need to check recall, precision & f1 score for the minority class, and it's quite evident that the precision, recall & f1 score is too low for Class 1, i.e. churned customers.
### Hence, moving ahead to call SMOTEENN (UpSampling + ENN)

In [54]:
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x,y)

In [55]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [56]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [58]:
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))

0.926890756302521
              precision    recall  f1-score   support

           0       0.92      0.91      0.92       523
           1       0.93      0.94      0.94       667

    accuracy                           0.93      1190
   macro avg       0.93      0.93      0.93      1190
weighted avg       0.93      0.93      0.93      1190



In [59]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[477  46]
 [ 41 626]]


### Now we can see quite better results, i.e. Accuracy: 95 %, and a very good recall, precision & f1 score for minority class.
### Let's try with some other classifier.

### Random Forest Classifier

In [60]:
from sklearn.ensemble import RandomForestClassifier

In [61]:
model_rf=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [62]:
model_rf.fit(x_train,y_train)

In [63]:
y_pred=model_rf.predict(x_test)

In [64]:
print(model_rf.score(x_test,y_test))
print(classification_report(y_test, y_pred, labels=[0,1]))

0.7803837953091685
              precision    recall  f1-score   support

           0       0.80      0.92      0.86      1006
           1       0.68      0.44      0.53       401

    accuracy                           0.78      1407
   macro avg       0.74      0.68      0.69      1407
weighted avg       0.77      0.78      0.76      1407



In [65]:
sm = SMOTEENN()
X_resampled1, y_resampled1 = sm.fit_resample(x,y)

In [66]:
xr_train1,xr_test1,yr_train1,yr_test1=train_test_split(X_resampled1, y_resampled1,test_size=0.2)

In [67]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [68]:
model_rf_smote.fit(xr_train1,yr_train1)

In [69]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [70]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [71]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9377664109121909
              precision    recall  f1-score   support

           0       0.95      0.91      0.93       508
           1       0.93      0.96      0.95       665

    accuracy                           0.94      1173
   macro avg       0.94      0.93      0.94      1173
weighted avg       0.94      0.94      0.94      1173



In [72]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[460  48]
 [ 25 640]]


### Performing PCA

In [73]:
# Applying PCA
from sklearn.decomposition import PCA
pca = PCA(0.9)
xr_train_pca = pca.fit_transform(xr_train1)
xr_test_pca = pca.transform(xr_test1)
explained_variance = pca.explained_variance_ratio_

In [74]:
model=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [75]:
model.fit(xr_train_pca,yr_train1)

In [76]:
yr_predict_pca = model.predict(xr_test_pca)

In [77]:
model_score_r_pca = model.score(xr_test_pca, yr_test1)

In [78]:
print(model_score_r_pca)
print(metrics.classification_report(yr_test1, yr_predict_pca))

0.7297527706734868
              precision    recall  f1-score   support

           0       0.70      0.66      0.68       508
           1       0.75      0.78      0.77       665

    accuracy                           0.73      1173
   macro avg       0.73      0.72      0.72      1173
weighted avg       0.73      0.73      0.73      1173



### With PCA, we couldn't see any better results, hence let's finalise the model which was created by RF Classifier, and save the model so that we can use it in a later stage.

### Pickling the model

In [79]:
import pickle

In [41]:
filename = 'model.sav'

In [80]:
pickle.dump(model_rf_smote, open(filename, 'wb'))

In [81]:
load_model = pickle.load(open(filename, 'rb'))

In [82]:
model_score_r1 = load_model.score(xr_test1, yr_test1)

In [83]:
model_score_r1

0.9377664109121909

In [87]:
import gradio as gr
import pickle
import pandas as pd
import numpy as np

# Load the pickled model
with open('model.sav', 'rb') as f:
    model = pickle.load(f)

# Function to bin tenure into groups
def bin_tenure(tenure):
    if 1 <= tenure <= 12:
        return '1 - 12'
    elif 13 <= tenure <= 24:
        return '13 - 24'
    elif 25 <= tenure <= 36:
        return '25 - 36'
    elif 37 <= tenure <= 48:
        return '37 - 48'
    elif 49 <= tenure <= 60:
        return '49 - 60'
    elif 61 <= tenure <= 72:
        return '61 - 72'
    else:
        return None  # Invalid tenure

# Prediction function
def predict_churn(senior_citizen, monthly_charges, total_charges, gender, partner, dependents, tenure,
                  phone_service, multiple_lines, internet_service, online_security, online_backup,
                  device_protection, tech_support, streaming_tv, streaming_movies, contract,
                  paperless_billing, payment_method):
    
    # Bin the tenure
    tenure_group = bin_tenure(tenure)
    if tenure_group is None:
        return "Invalid tenure value. Must be between 1 and 72."

    # Create a dictionary with all features initialized to 0
    features = {
        'SeniorCitizen': senior_citizen,
        'MonthlyCharges': monthly_charges,
        'TotalCharges': total_charges,
        'gender_Female': 0,
        'gender_Male': 0,
        'Partner_No': 0,
        'Partner_Yes': 0,
        'Dependents_No': 0,
        'Dependents_Yes': 0,
        'PhoneService_No': 0,
        'PhoneService_Yes': 0,
        'MultipleLines_No': 0,
        'MultipleLines_No phone service': 0,
        'MultipleLines_Yes': 0,
        'InternetService_DSL': 0,
        'InternetService_Fiber optic': 0,
        'InternetService_No': 0,
        'OnlineSecurity_No': 0,
        'OnlineSecurity_No internet service': 0,
        'OnlineSecurity_Yes': 0,
        'OnlineBackup_No': 0,
        'OnlineBackup_No internet service': 0,
        'OnlineBackup_Yes': 0,
        'DeviceProtection_No': 0,
        'DeviceProtection_No internet service': 0,
        'DeviceProtection_Yes': 0,
        'TechSupport_No': 0,
        'TechSupport_No internet service': 0,
        'TechSupport_Yes': 0,
        'StreamingTV_No': 0,
        'StreamingTV_No internet service': 0,
        'StreamingTV_Yes': 0,
        'StreamingMovies_No': 0,
        'StreamingMovies_No internet service': 0,
        'StreamingMovies_Yes': 0,
        'Contract_Month-to-month': 0,
        'Contract_One year': 0,
        'Contract_Two year': 0,
        'PaperlessBilling_No': 0,
        'PaperlessBilling_Yes': 0,
        'PaymentMethod_Bank transfer (automatic)': 0,
        'PaymentMethod_Credit card (automatic)': 0,
        'PaymentMethod_Electronic check': 0,
        'PaymentMethod_Mailed check': 0,
        'tenure_group_1 - 12': 0,
        'tenure_group_13 - 24': 0,
        'tenure_group_25 - 36': 0,
        'tenure_group_37 - 48': 0,
        'tenure_group_49 - 60': 0,
        'tenure_group_61 - 72': 0
    }

    # Set the one-hot encoded values based on inputs
    features[f'gender_{gender}'] = 1
    features[f'Partner_{partner}'] = 1
    features[f'Dependents_{dependents}'] = 1
    features[f'PhoneService_{phone_service}'] = 1
    features[f'MultipleLines_{multiple_lines}'] = 1
    features[f'InternetService_{internet_service}'] = 1
    features[f'OnlineSecurity_{online_security}'] = 1
    features[f'OnlineBackup_{online_backup}'] = 1
    features[f'DeviceProtection_{device_protection}'] = 1
    features[f'TechSupport_{tech_support}'] = 1
    features[f'StreamingTV_{streaming_tv}'] = 1
    features[f'StreamingMovies_{streaming_movies}'] = 1
    features[f'Contract_{contract}'] = 1
    features[f'PaperlessBilling_{paperless_billing}'] = 1
    features[f'PaymentMethod_{payment_method}'] = 1
    features[f'tenure_group_{tenure_group}'] = 1

    # Convert to DataFrame for prediction
    input_df = pd.DataFrame([features])

    # Predict
    prediction = model.predict(input_df)[0]
    prob = model.predict_proba(input_df)[0][1]  # Probability of churn (class 1)

    if prediction == 1:
        return f"Customer will churn with probability {prob:.2f}"
    else:
        return f"Customer will not churn with probability {1 - prob:.2f}"

# Gradio Interface
with gr.Blocks() as iface:
    gr.Markdown("# Telecom Customer Churn Prediction")
    gr.Markdown("Enter customer details to predict churn.")

    with gr.Row():
        senior_citizen = gr.Number(label="Senior Citizen (0 or 1)", value=0)
        monthly_charges = gr.Number(label="Monthly Charges", value=0.0)
        total_charges = gr.Number(label="Total Charges", value=0.0)
        tenure = gr.Number(label="Tenure (1-72 months)", value=1)

    with gr.Row():
        gender = gr.Dropdown(choices=["Male", "Female"], label="Gender")
        partner = gr.Dropdown(choices=["Yes", "No"], label="Partner")
        dependents = gr.Dropdown(choices=["Yes", "No"], label="Dependents")
        phone_service = gr.Dropdown(choices=["Yes", "No"], label="Phone Service")

    with gr.Row():
        multiple_lines = gr.Dropdown(choices=["Yes", "No", "No phone service"], label="Multiple Lines")
        internet_service = gr.Dropdown(choices=["DSL", "Fiber optic", "No"], label="Internet Service")

    with gr.Row():
        online_security = gr.Dropdown(choices=["Yes", "No", "No internet service"], label="Online Security")
        online_backup = gr.Dropdown(choices=["Yes", "No", "No internet service"], label="Online Backup")
        device_protection = gr.Dropdown(choices=["Yes", "No", "No internet service"], label="Device Protection")

    with gr.Row():
        tech_support = gr.Dropdown(choices=["Yes", "No", "No internet service"], label="Tech Support")
        streaming_tv = gr.Dropdown(choices=["Yes", "No", "No internet service"], label="Streaming TV")
        streaming_movies = gr.Dropdown(choices=["Yes", "No", "No internet service"], label="Streaming Movies")

    with gr.Row():
        contract = gr.Dropdown(choices=["Month-to-month", "One year", "Two year"], label="Contract")
        paperless_billing = gr.Dropdown(choices=["Yes", "No"], label="Paperless Billing")
        payment_method = gr.Dropdown(choices=["Bank transfer (automatic)", "Credit card (automatic)", "Electronic check", "Mailed check"], label="Payment Method")

    output = gr.Textbox(label="Prediction")

    predict_btn = gr.Button("Predict")
    predict_btn.click(fn=predict_churn, inputs=[senior_citizen, monthly_charges, total_charges, gender, partner, dependents, tenure,
                                                phone_service, multiple_lines, internet_service, online_security, online_backup,
                                                device_protection, tech_support, streaming_tv, streaming_movies, contract,
                                                paperless_billing, payment_method], outputs=output)

iface.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


