In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [4]:
dt = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
dt.head(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [5]:
dt.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75


In [6]:
dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [7]:
dt['gender'].unique()

array(['Female', 'Male'], dtype=object)

In [8]:
dt['gender'] = dt['gender'].map({'Male':0, 'Female':1})

In [9]:
dt.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,0,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,0,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,0,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,1,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [10]:
dt['Partner'].unique()

array(['Yes', 'No'], dtype=object)

In [11]:
dt['Partner'] = dt['Partner'].map({"Yes":1, "No": 0})

In [12]:
dt.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,0,1,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,0,0,0,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,0,0,0,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,0,0,0,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,1,0,0,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [13]:
dt.Dependents.unique()

array(['No', 'Yes'], dtype=object)

In [14]:
dt["Dependents"] = dt["Dependents"].map({"No" :0, "Yes":1})

In [15]:
dt.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,0,1,0,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,0,0,0,0,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,0,0,0,0,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,0,0,0,0,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,1,0,0,0,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [16]:
dt.PhoneService.unique()

array(['No', 'Yes'], dtype=object)

In [17]:
dt["PhoneService"] = dt["PhoneService"].map({'No': 0, "Yes":1})

In [18]:
dt.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,0,1,0,1,0,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,0,0,0,0,34,1,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,0,0,0,0,2,1,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,0,0,0,0,45,0,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,1,0,0,0,2,1,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [19]:
le = LabelEncoder()
dt["MultipleLines"] = le.fit_transform(dt["MultipleLines"])

In [20]:
dt["OnlineSecurity"] = le.fit_transform(dt["OnlineSecurity"])

In [21]:
dt["OnlineBackup"] = le.fit_transform(dt["OnlineBackup"])

In [22]:
dt["DeviceProtection"] = le.fit_transform(dt["DeviceProtection"])

In [23]:
dt["TechSupport"] = le.fit_transform(dt["TechSupport"])

In [24]:
dt["StreamingTV"] = le.fit_transform(dt["StreamingTV"])

In [25]:
dt["StreamingMovies"] = le.fit_transform(dt["StreamingMovies"])

In [26]:
dt["PaperlessBilling"] = le.fit_transform(dt["PaperlessBilling"])

In [27]:
dt["PaymentMethod"] = le.fit_transform(dt["PaymentMethod"])
dt["Churn"] = le.fit_transform(dt.Churn)

In [28]:
dt.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,0,1,0,1,0,1,DSL,0,...,0,0,0,0,Month-to-month,1,2,29.85,29.85,0
1,5575-GNVDE,0,0,0,0,34,1,0,DSL,2,...,2,0,0,0,One year,0,3,56.95,1889.5,0
2,3668-QPYBK,0,0,0,0,2,1,0,DSL,2,...,0,0,0,0,Month-to-month,1,3,53.85,108.15,1
3,7795-CFOCW,0,0,0,0,45,0,1,DSL,2,...,2,2,0,0,One year,0,0,42.3,1840.75,0
4,9237-HQITU,1,0,0,0,2,1,0,Fiber optic,0,...,0,0,0,0,Month-to-month,1,2,70.7,151.65,1


In [29]:
dt['Contract'].unique()

array(['Month-to-month', 'One year', 'Two year'], dtype=object)

In [30]:
dt["Contract"] = le.fit_transform(dt["Contract"])

In [31]:
dt.InternetService.unique()

array(['DSL', 'Fiber optic', 'No'], dtype=object)

In [32]:
dt["InternetService"] = dt["InternetService"].map({'No':0, 'DSL': 1, 'Fiber optic': 2})

In [33]:
dt["MonthlyCharges"] = np.round(dt["MonthlyCharges"])

In [34]:
dt.iloc[488:489]    #dt.iloc[488] shows in vertical format.

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
488,4472-LVYGI,1,0,1,1,0,0,1,1,2,...,2,2,2,0,2,1,0,53.0,,0


In [35]:
dt.replace(" ", pd.NA, inplace=True)     # Converts into " "(Empty space) to NA

In [36]:
dt["TotalCharges"] = pd.to_numeric(dt['TotalCharges'])      # Converts str column to int column

In [37]:
dt['TotalCharges'].fillna(dt["TotalCharges"].mean(), inplace=True)       # Filling NA value into mean value

In [38]:
dt['TotalCharges'] = np.round(dt['TotalCharges'])       # Rounding up the value

In [39]:
dt

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,0,1,0,1,0,1,1,0,...,0,0,0,0,0,1,2,30.0,30.0,0
1,5575-GNVDE,0,0,0,0,34,1,0,1,2,...,2,0,0,0,1,0,3,57.0,1890.0,0
2,3668-QPYBK,0,0,0,0,2,1,0,1,2,...,0,0,0,0,0,1,3,54.0,108.0,1
3,7795-CFOCW,0,0,0,0,45,0,1,1,2,...,2,2,0,0,1,0,0,42.0,1841.0,0
4,9237-HQITU,1,0,0,0,2,1,0,2,0,...,0,0,0,0,0,1,2,71.0,152.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,0,0,1,1,24,1,2,1,2,...,2,2,2,2,1,1,3,85.0,1990.0,0
7039,2234-XADUH,1,0,1,1,72,1,2,2,0,...,2,0,2,2,1,1,1,103.0,7363.0,0
7040,4801-JZAZL,1,0,1,1,11,0,1,1,2,...,0,0,0,0,0,1,2,30.0,346.0,0
7041,8361-LTMKD,0,1,1,0,4,1,2,2,0,...,0,0,0,0,0,1,3,74.0,307.0,1


In [40]:
scaler = MinMaxScaler()
dt['tenure'] = scaler.fit_transform(dt[["tenure"]])

In [41]:
dt.tenure

0       0.013889
1       0.472222
2       0.027778
3       0.625000
4       0.027778
          ...   
7038    0.333333
7039    1.000000
7040    0.152778
7041    0.055556
7042    0.916667
Name: tenure, Length: 7043, dtype: float64

In [42]:
dt = dt.drop(columns= 'customerID')

In [43]:
dt.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,1,0,1,0,0.013889,0,1,1,0,2,0,0,0,0,0,1,2,30.0,30.0,0
1,0,0,0,0,0.472222,1,0,1,2,0,2,0,0,0,1,0,3,57.0,1890.0,0
2,0,0,0,0,0.027778,1,0,1,2,2,0,0,0,0,0,1,3,54.0,108.0,1
3,0,0,0,0,0.625,0,1,1,2,0,2,2,0,0,1,0,0,42.0,1841.0,0
4,1,0,0,0,0.027778,1,0,2,0,0,0,0,0,0,0,1,2,71.0,152.0,1


In [None]:
X = dt.drop(columns='Churn')
y = dt['Churn']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.28, random_state=42)

# 6️⃣ Train Decision Tree Model
model = DecisionTreeClassifier(max_depth=8, random_state=42)  # You can tune hyperparameters
model.fit(X_train, y_train)

# 7️⃣ Make Predictions
y_pred = model.predict(X_test)


In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.7815509376583882
