In [103]:
def load_data(filepath):
    df=pd.read_csv(filepath)
    return df

In [104]:
def clean_data_prep(df):
    df = df.drop_duplicates()
    df['TotalCharges']=pd.to_numeric(df['TotalCharges'],errors='coerce')
    df['TotalCharges'].fillna(value=df['TotalCharges'].median(),inplace=True)
    df.drop('customerID',axis=1,inplace=True)
    return df


In [105]:
def feature_engg(df,flag=0):
    
    df = pd.get_dummies(df, drop_first=True)
  

    four_cols = [
    'OnlineSecurity_No internet service',
    'OnlineBackup_No internet service',
    'DeviceProtection_No internet service',
    'TechSupport_No internet service'
    ]   
    
    if flag==1:
        for col in four_cols:
            if col not in df:
                df[col] = 0
        
        df['no_services_count'] = df[four_cols].sum(axis=1)

    else:
        df['no_services_count'] = df[four_cols].sum(axis=1)
    threshold_bill=94.25
    df['high_bill_customer']=(df['MonthlyCharges']>threshold_bill).astype(int)

    return df


In [106]:
def align_columns(df,expected_cols):
    for cols in expected_cols:
        if cols not in df:
            df[cols]=0
    df=df[expected_cols]
    return df

In [107]:
def load_model():
    model=joblib.load("/Users/shinojphilipjohn/Github/data-engg-science-projects/customer_churn_ml/models/random_forest_classifier_tuned.pkl")
    expected_cols=joblib.load("/Users/shinojphilipjohn/Github/data-engg-science-projects/customer_churn_ml/models/trained_columns.pkl")
    print("Model loaded!")

    return model,expected_cols


In [108]:
def predict_single(model,user_dict,expected_cols):
    cleaned_df=clean_data_prep(user_dict)
 
    final_df=feature_engg(cleaned_df,1)

    align_df=align_columns(final_df,expected_cols)
    y_pred_prob=model.predict_proba(align_df)[:,1][0]
    prediction = int(y_pred_prob >= 0.5)
    return prediction,y_pred_prob

In [109]:
def predict_bulk(model,df,expected_cols):
    cleaned_df=clean_data_prep(df)

    final_df=feature_engg(cleaned_df)

    align_df=align_columns(final_df,expected_cols)

    y_pred=model.predict(align_df)
    y_pred_prob=model.predict_proba(align_df)[:,1]
    df['Churn Prediction']=y_pred
    df['Churn Probability']=y_pred_prob
    return df

In [110]:
#test case
example_customer= {
    "customerID": "0001-ABCD",
    "gender": "Male",
    "SeniorCitizen": 0,
    "Partner": "No",
    "Dependents": "No",
    "tenure": 5,
    "PhoneService": "Yes",
    "MultipleLines": "No",
    "InternetService": "Fiber optic",
    "OnlineSecurity": "No",
    "OnlineBackup": "No",
    "DeviceProtection": "No",
    "TechSupport": "No",
    "StreamingTV": "Yes",
    "StreamingMovies": "Yes",
    "Contract": "Month-to-month",
    "PaperlessBilling": "Yes",
    "PaymentMethod": "Electronic check",
    "MonthlyCharges": 92.6,
    "TotalCharges": 463.0
}

import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

import warnings
warnings.filterwarnings("ignore")


filepath="/Users/shinojphilipjohn/Github/data-engg-science-projects/customer_churn_ml/dataset/Telco-Customer-Churn copy.csv"

df=load_data(filepath)
model,expected_cols=load_model()
example_customer=pd.DataFrame(example_customer,index=[0])
prediction,y_pred_prob=predict_single(model,example_customer,expected_cols)
print(f"Single User Churn Prediction: {bool(prediction)} and Probabiltiy: {round(float(y_pred_prob),3)}!")
df_pred=predict_bulk(model,df,expected_cols)
print("\n")
print("Bulk Prediction!")
print(df_pred.head())



Model loaded!
Single User Churn Prediction: True and Probabiltiy: 0.504!


Bulk Prediction!
   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... TechSupport  \
0  No phone service             DSL             No  ...          No   
1                No             DSL            Yes  ...          No   
2                No             DSL            Yes  ...          No   
3  No phone service             DSL            Yes  ...         Yes   
4                No     Fiber optic             No  ...          No   

