<a href="https://colab.research.google.com/github/prince545/ml-learning/blob/main/classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv("/content/WA_Fn-UseC_-Telco-Customer-Churn.csv")

# Drop missing values
df.dropna(how='any', inplace=True)

# Convert 'TotalCharges' to numeric (in case of spaces)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(subset=['TotalCharges'], inplace=True)

# Encode target variable
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# Drop irrelevant column
df.drop('customerID', axis=1, inplace=True)

# One-hot encode categorical features
X = pd.get_dummies(df.drop('Churn', axis=1), drop_first=True)
y = df['Churn']

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Import classifiers and metrics
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

# Models dictionary
models = {
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=200),
    "Naive Bayes": BernoulliNB(),
    "SVM": SVC(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

# Evaluate each model
results = []

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    precision = report['1']['precision']
    recall = report['1']['recall']
    f1 = report['1']['f1-score']

    results.append({
        'Model': name,
        'Accuracy': round(acc * 100, 2),
        'Precision': round(precision * 100, 2),
        'Recall': round(recall * 100, 2),
        'F1-Score': round(f1 * 100, 2)
    })

# Display results
results_df = pd.DataFrame(results)
print(results_df)


                 Model  Accuracy  Precision  Recall  F1-Score
0                  KNN     75.27      53.67   50.80     52.20
1        Decision Tree     71.71      47.00   50.27     48.58
2        Random Forest     78.68      63.21   47.33     54.13
3          Naive Bayes     71.57      47.90   79.14     59.68
4                  SVM     78.11      61.62   46.79     53.19
5  Logistic Regression     78.75      62.06   51.60     56.35
