In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io
from google.colab import files

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# 1. Load Data
#df = pd.read_csv('/mnt/data/telecom_customer_churn.csv')  # Update path if neede
# 1. Upload CSV
uploaded = files.upload()
file_name = next(iter(uploaded))
df = pd.read_csv(io.StringIO(uploaded[file_name].decode('utf-8')))

# 2. Basic Info
print("\nFirst 5 rows:\n", df.head())
print("\nData Info:\n")
print(df.info())

# 3. Drop irrelevant columns
df.drop(['Customer ID', 'City', 'Zip Code', 'Latitude', 'Longitude', 'Churn Category', 'Churn Reason'], axis=1, inplace=True)

# 4. Filter only 'Stayed' and 'Churned' customers
df = df[df['Customer Status'].isin(['Stayed', 'Churned'])]

# 5. Handle missing values (corrected way)
df.fillna({
    'Offer': 'None',
    'Internet Type': 'None',
    'Avg Monthly Long Distance Charges': df['Avg Monthly Long Distance Charges'].median(),
    'Avg Monthly GB Download': df['Avg Monthly GB Download'].median()
}, inplace=True)

# 6. Label Encoding for categorical variables
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

# 7. Feature Selection
X = df.drop('Customer Status', axis=1)
y = df['Customer Status']

# 8. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 9. Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 10. Build Models
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

# 11. Train and Evaluate Models
for name, model in models.items():
    print(f"\n\n----- {name} -----")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"{name} - Confusion Matrix")
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()