In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, roc_auc_score, precision_score,
    recall_score, f1_score, matthews_corrcoef,
    confusion_matrix, classification_report
)
import pickle

# Step 1: Load dataset
df = pd.read_csv("Customer Churn.csv")

# Step 2: Features and target
X = df.drop("Churn", axis=1)
y = df["Churn"]

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 4: Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Train logistic regression
log_reg = LogisticRegression(max_iter=2000, solver="lbfgs")
log_reg.fit(X_train_scaled, y_train)

# Step 6: Predictions
y_pred = log_reg.predict(X_test_scaled)
y_proba = log_reg.predict_proba(X_test_scaled)[:, 1]

# Step 7: Metrics
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred)

print("Accuracy:", accuracy)
print("AUC Score:", auc)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Matthews Correlation Coefficient:", mcc)

# Step 8: Save model and scaler
with open("model/logistic_churn_model.pkl", "wb") as f:
    pickle.dump(log_reg, f)

with open("model/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("✅ Logistic Regression model and scaler saved as pickle files.")


Accuracy: 0.8968253968253969
AUC Score: 0.9207898190949039
Precision: 0.84
Recall: 0.42424242424242425
F1 Score: 0.5637583892617449
Matthews Correlation Coefficient: 0.5509047964827836
✅ Logistic Regression model and scaler saved as pickle files.
