In [123]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
df = pd.read_csv('data/customer-churn.csv')
df.head()

In [None]:
categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
numerical_columns, categorical_columns

In [None]:
one_hot_columns = ['MultipleLines', 'InternetService', 'Contract', 'PaymentMethod', 'StreamingTV', 'StreamingMovies', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport']

df = pd.get_dummies(df, columns=one_hot_columns)
df.head(7)

In [None]:
dict_map = {"Yes":1, "No":0, "Male":1, "Female":0, "True":1, "False":0}

map_columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
for column in map_columns:
    print(df[column].unique(), column)
    df[column] = df[column].map(dict_map)

df

In [None]:
X = df.drop(columns=['Churn'])
X = X.select_dtypes(include=['int64', 'float64'])
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

std = StandardScaler()
X_train = std.fit_transform(X_train)
X_test = std.transform(X_test)

In [128]:
k = 5
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
con = confusion_matrix(y_test, y_pred)
cls = classification_report(y_test, y_pred)

print("Accuracy:", acc)
print("Confusion Matrix:\n", con)
print("Classification Report:\n", cls)

Accuracy: 0.7686302342086586
Confusion Matrix:
 [[896 140]
 [186 187]]
Classification Report:
               precision    recall  f1-score   support

          No       0.83      0.86      0.85      1036
         Yes       0.57      0.50      0.53       373

    accuracy                           0.77      1409
   macro avg       0.70      0.68      0.69      1409
weighted avg       0.76      0.77      0.76      1409

