In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Libraries imported successfully")

Libraries imported successfully


In [2]:
df = pd.read_csv("customer_churn.csv")

print("Dataset loaded successfully")
print("Shape of dataset:", df.shape)

Dataset loaded successfully
Shape of dataset: (64374, 12)


In [3]:
print("Columns in dataset:")
df.columns


Columns in dataset:


Index(['CustomerID', 'Age', 'Gender', 'Tenure', 'Usage Frequency',
       'Support Calls', 'Payment Delay', 'Subscription Type',
       'Contract Length', 'Total Spend', 'Last Interaction', 'Churn'],
      dtype='object')

In [4]:
print("Missing values before:")
print(df.isnull().sum())

df.fillna(df.mode().iloc[0], inplace=True)

print("\nMissing values after:")
print(df.isnull().sum())


Missing values before:
CustomerID           0
Age                  0
Gender               0
Tenure               0
Usage Frequency      0
Support Calls        0
Payment Delay        0
Subscription Type    0
Contract Length      0
Total Spend          0
Last Interaction     0
Churn                0
dtype: int64

Missing values after:
CustomerID           0
Age                  0
Gender               0
Tenure               0
Usage Frequency      0
Support Calls        0
Payment Delay        0
Subscription Type    0
Contract Length      0
Total Spend          0
Last Interaction     0
Churn                0
dtype: int64


In [5]:
encoder = LabelEncoder()

for col in df.select_dtypes(include="object").columns:
    df[col] = encoder.fit_transform(df[col])

print("Categorical columns encoded")
df.head()


Categorical columns encoded


Unnamed: 0,CustomerID,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,1,22,0,25,14,4,27,0,1,598,9,1
1,2,41,0,28,28,7,13,2,1,584,20,0
2,3,47,1,27,10,2,29,1,0,757,21,0
3,4,35,1,9,12,5,17,1,2,232,18,0
4,5,53,0,58,24,9,2,2,0,533,18,0


In [6]:
X = df.drop("Churn", axis=1)
y = df["Churn"]

print("Features shape:", X.shape)
print("Target shape:", y.shape)


Features shape: (64374, 11)
Target shape: (64374,)


In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Feature scaling completed")
print("First scaled row:\n", X_scaled[0])

Feature scaling completed
First scaled row:
 [-1.7320239  -1.43420211 -0.93482484 -0.40909964 -0.12252553 -0.44980683
  1.11453793 -1.22518771  0.01104616  0.21840769 -0.75232377]


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

print("Training set size:", X_train.shape)
print("Testing set size :", X_test.shape)

Training set size: (48280, 11)
Testing set size : (16094, 11)


In [9]:
nb = GaussianNB()
nb.fit(X_train, y_train)

print("Naïve Bayes model trained")

Naïve Bayes model trained


In [10]:
nb_pred = nb.predict(X_test)

nb_accuracy = accuracy_score(y_test, nb_pred)
nb_precision = precision_score(y_test, nb_pred)
nb_recall = recall_score(y_test, nb_pred)

print("Naïve Bayes Results")
print("Accuracy :", nb_accuracy)
print("Precision:", nb_precision)
print("Recall   :", nb_recall)


Naïve Bayes Results
Accuracy : 0.8697029949049335
Precision: 0.8755609955120359
Recall   : 0.8448818897637795


In [24]:
knn = KNeighborsClassifier(n_neighbors=4)
knn.fit(X_train, y_train)

print("kNN model trained")

kNN model trained


In [25]:
knn_pred = knn.predict(X_test)

knn_accuracy = accuracy_score(y_test, knn_pred)
knn_precision = precision_score(y_test, knn_pred)
knn_recall = recall_score(y_test, knn_pred)

print("kNN Results")
print("Accuracy :", knn_accuracy)
print("Precision:", knn_precision)
print("Recall   :", knn_recall)


kNN Results
Accuracy : 0.9248167018764757
Precision: 0.9555144968732234
Recall   : 0.8822834645669292


In [26]:
comparison = pd.DataFrame({
    "Algorithm": ["Naïve Bayes", "kNN"],
    "Accuracy": [nb_accuracy, knn_accuracy],
    "Precision": [nb_precision, knn_precision],
    "Recall": [nb_recall, knn_recall]
})

comparison


Unnamed: 0,Algorithm,Accuracy,Precision,Recall
0,Naïve Bayes,0.869703,0.875561,0.844882
1,kNN,0.924817,0.955514,0.882283
