In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Synthetic churn dataset
np.random.seed(42)
n_samples = 1000
monthly_charges = np.random.uniform(20, 100, n_samples)
tenure = np.random.randint(1, 60, n_samples)
support_calls = np.random.poisson(2, n_samples)
internet_service = np.random.choice([0, 1], n_samples)
contract_type = np.random.choice([0, 1], n_samples)
churn = (0.3 * (monthly_charges > 80) + 0.4 * (tenure < 12) + 0.2 * (support_calls > 3) + 0.1 * np.random.randn(n_samples) > 0.4).astype(int)

df = pd.DataFrame({
    'monthly_charges': monthly_charges,
    'tenure': tenure,
    'support_calls': support_calls,
    'internet_service': internet_service,
    'contract_type': contract_type,
    'churn': churn
})

X = df.drop('churn', axis=1)
y = df['churn']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Logistic Regression
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 5-fold CV for Logistic Regression
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_accuracy_scores = cross_val_score(lr, X_train, y_train, cv=cv, scoring='accuracy')
cv_accuracy_mean = cv_accuracy_scores.mean()
cv_accuracy_std = cv_accuracy_scores.std()

# Random Forest CV
rf_cv_scores = cross_val_score(rf, X_train, y_train, cv=cv, scoring='accuracy')
rf_cv_mean = rf_cv_scores.mean()
rf_cv_std = rf_cv_scores.std()

# Stratified CV for both models
stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stratified_lr_scores = cross_val_score(lr, X_train, y_train, cv=stratified_cv, scoring='accuracy')
stratified_rf_scores = cross_val_score(rf, X_train, y_train, cv=stratified_cv, scoring='accuracy')

print("Logistic Regression CV Scores:", cv_accuracy_scores)
print("Mean:", cv_accuracy_mean, "Std:", cv_accuracy_std)
print("Random Forest CV Scores:", rf_cv_scores)
print("Mean:", rf_cv_mean, "Std:", rf_cv_std)
print("Stratified LR Scores:", stratified_lr_scores)
print("Stratified RF Scores:", stratified_rf_scores)
