# Customer Churn Model Training

In [1]:
# churn_model_training_py313.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
import os

# ----------------------------
# 1️⃣ Load Dataset
# ----------------------------
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

# Convert 'TotalCharges' to numeric safely
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())

# ----------------------------
# 2️⃣ Encode Categorical Columns
# ----------------------------
encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    if col != 'customerID':  # skip ID
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        encoders[col] = le  # save encoder

# Drop customerID column
df = df.drop('customerID', axis=1)

# ----------------------------
# 3️⃣ Split Features & Labels
# ----------------------------
X = df.drop('Churn', axis=1)
y = df['Churn']

# Scale numeric features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------------
# 4️⃣ Train Models
# ----------------------------
models = {
    'log_reg.pkl': LogisticRegression(max_iter=1000),
    'decision_tree.pkl': DecisionTreeClassifier(max_depth=5),
    'random_forest.pkl': RandomForestClassifier(n_estimators=100),
    'svm.pkl': SVC(probability=True)
}

# Ensure models folder exists
os.makedirs("models", exist_ok=True)

for filename, model in models.items():
    model.fit(X_train, y_train)
    joblib.dump(model, f"models/{filename}")
    print(f"✅ Saved {filename}")

# ----------------------------
# 5️⃣ Save Scaler & Encoders
# ----------------------------
joblib.dump(scaler, "models/scaler.pkl")
joblib.dump(encoders, "models/encoders.pkl")
print("✅ Saved scaler.pkl and encoders.pkl")

# ----------------------------
# 6️⃣ Optional: Check Accuracy
# ----------------------------
for name, model in models.items():
    score = model.score(X_test, y_test)
    print(f"{name} test accuracy: {score:.3f}")


✅ Saved log_reg.pkl
✅ Saved decision_tree.pkl
✅ Saved random_forest.pkl
✅ Saved svm.pkl
✅ Saved scaler.pkl and encoders.pkl
log_reg.pkl test accuracy: 0.815
decision_tree.pkl test accuracy: 0.794
random_forest.pkl test accuracy: 0.787
svm.pkl test accuracy: 0.807
