In [2]:
# Import libraries:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

import warnings
warnings.filterwarnings("ignore")
sns.set_style("darkgrid")

In [3]:
path = "/home/sunilmishraji01/sunil/Project/Churn_Prediction/input/Telco-Customer-Churn.csv"
dataset = pd.read_csv(path)
#df = dataset.copy()

In [4]:
dataset.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [5]:
def DataPrepration(path):
    """
    Return dataset after preprocessing.
    
    path >> location of dataset.  type(str)
    
    """
    df = pd.read_csv(path)
    df.drop(columns=["customerID"],inplace=True)
    df.TotalCharges = pd.to_numeric(df.TotalCharges,errors="coerce")
    df = df.dropna(subset=["TotalCharges"])
    ordinal_encoder = preprocessing.OrdinalEncoder()
    df.iloc[:,:-1]=ordinal_encoder.fit_transform(df.iloc[:,:-1])
    label_encoder = preprocessing.LabelEncoder()
    df.iloc[:,-1]= label_encoder.fit_transform(df.iloc[:,-1])
    return df

In [6]:
df = DataPrepration(path)
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,142.0,74.0,0
1,1.0,0.0,0.0,0.0,33.0,1.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,3.0,497.0,3624.0,0
2,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,435.0,536.0,1
3,1.0,0.0,0.0,0.0,44.0,0.0,1.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,266.0,3570.0,0
4,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,728.0,674.0,1


In [7]:
# Independant & Dependent features:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [8]:
X_train,X_val,y_train,y_val = model_selection.train_test_split(X,y,random_state=42,stratify=y)

In [9]:
# train model
tree_clf = DecisionTreeClassifier(max_depth=2,random_state=42)

tree_clf.fit(X_train,y_train)

In [10]:
# prediction
y_predict = model_selection.cross_val_predict(tree_clf,X_train,y_train,n_jobs=-1)

In [11]:
# Accuracy 
model_selection.cross_val_score(tree_clf,X_train,y_train,n_jobs=-1,scoring="accuracy")

array([0.7478673 , 0.7507109 , 0.7535545 , 0.73175355, 0.73719165])

In [12]:
# F1-score
metrics.f1_score(y_train,y_predict)

0.5905918057663125

In [13]:
# precision score
metrics.precision_score(y_train,y_predict)

0.5139989434759641

In [14]:
# recall or sensitivity
metrics.recall_score(y_train,y_predict)

0.6940085592011412

In [15]:
# Confusion matrix
metrics.confusion_matrix(y_train,y_predict)

array([[2952,  920],
       [ 429,  973]])

In [16]:
print(metrics.classification_report(y_train,y_predict))

              precision    recall  f1-score   support

           0       0.87      0.76      0.81      3872
           1       0.51      0.69      0.59      1402

    accuracy                           0.74      5274
   macro avg       0.69      0.73      0.70      5274
weighted avg       0.78      0.74      0.75      5274



In [17]:
df[0:1].values.shape

(1, 20)

In [18]:
y_train[0]

0

In [19]:
tree_clf.predict(X_train[0:1].values)

array([0])

In [20]:
# pickling the modelfile for deployment
import pickle

In [21]:
# saved model on spacific location
with open("/home/sunilmishraji01/sunil/Project/Churn_Prediction/saved__model/tree_clf_V0", "wb") as f:
    pickle.dump(tree_clf,f)

In [22]:
# load model from spacific location
with open("/home/sunilmishraji01/sunil/Project/Churn_Prediction/saved__model/tree_clf_V0", "rb") as f:
    pickled_model=pickle.load(f)

In [38]:
a = np.array(X_train)
a[0]

array([1.000e+00, 0.000e+00, 1.000e+00, 1.000e+00, 1.800e+01, 1.000e+00,
       0.000e+00, 1.000e+00, 0.000e+00, 2.000e+00, 0.000e+00, 2.000e+00,
       2.000e+00, 2.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 1.280e+03,
       3.623e+03])

In [63]:
pickled_model.predict(X_train[:1])

array([0])

In [60]:
y_train

3687    1
5553    1
6870    0
1874    0
147     1
       ..
5161    0
3451    0
4135    0
4249    0
1232    1
Name: Churn, Length: 5274, dtype: int64

In [30]:
{data =
    "data":{
        "gender":0.0,
        "SeniorCitizen":0.0,
        "Partner":0.0,
        "Dependents":0.0,
        "tenure":2.0,
        "PhoneService":1.0,
        "MultipleLines":2.0,
        "InternetService":1.0,
        "OnlineSecurity":0.0,
        "OnlineBackup":0.0,
        "DeviceProtection":0.0,
        "TechSupport":0.0,
        "StreamingTV":0.0,
        "MonthlyCharges":794.0,
        "TotalCharges": 882.0

    }
}

SyntaxError: invalid syntax (264319612.py, line 1)