In [None]:
# 1. Load dataset# model_build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import joblib

df = pd.read_csv("customer_churn_data.csv")

# 2. Basic Cleaning
df.drop_duplicates(inplace=True)
df.fillna(method="ffill", inplace=True)

# 3. Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=["object"]).columns:
    if col != "Churn":  # Don't encode target
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# 4. Features & Target
X = df.drop("Churn", axis=1)   # Drop target column
y = df["Churn"]

# Save feature names (without Churn)
feature_names = X.columns.tolist()

# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 7. Train Models
models = {
    "RandomForest": RandomForestClassifier(random_state=42),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVC": SVC(probability=True, random_state=42),
    "LogisticRegression": LogisticRegression(max_iter=1000, random_state=42)
}

best_model = None
best_score = 0

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"{name} Accuracy: {acc:.4f}")
    
    if acc > best_score:
        best_score = acc
        best_model = model

print(f"\nBest Model: {best_model.__class__.__name__} with Accuracy: {best_score:.4f}")

# 8. Save everything
joblib.dump({
    "model": best_model,
    "scaler": scaler,
    "encoders": label_encoders,
    "features": feature_names
}, "joblib.jb")

print("✅ Model saved as joblib.jb")



  df.fillna(method="ffill", inplace=True)


RandomForest Accuracy: 1.0000
DecisionTree Accuracy: 1.0000
KNN Accuracy: 0.9350
SVC Accuracy: 0.9650
LogisticRegression Accuracy: 0.9150

Best Model: RandomForestClassifier with Accuracy: 1.0000
✅ Model saved as joblib.jb
