In [None]:
import pandas as pd 
import numpy as np

df= pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

print(df.head())
df.info()

In [None]:
df["TotalCharges"]= pd.to_numeric(df["TotalCharges"], errors="coerce")
print(df.isna().sum())

In [None]:
from sklearn.impute import SimpleImputer

imputer= SimpleImputer(strategy="mean")
df["TotalCharges"]= imputer.fit_transform(df[["TotalCharges"]])

In [None]:
#Categorical Encoding (Dummy Variables)
df.drop("customerID", axis=1, inplace=True)
df_prepared= pd.get_dummies(df, drop_first=True)

print(df_prepared.head())

In [None]:
from sklearn.model_selection import train_test_split

X= df_prepared.drop("Churn_Yes", axis=1).values
y= df_prepared["Churn_Yes"].values

X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Train set dimensions: {X_train.shape}")
print(f"Test set dimensions: {X_test.shape}")

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold

steps=[
    ("scaler", StandardScaler()),
    ("logreg", LogisticRegression())
]

pipeline= Pipeline(steps)


In [None]:
kf= KFold(n_splits=5, shuffle=True, random_state=42)

cv_results= cross_val_score(pipeline, X_train, y_train, cv=kf)

print(f"Cross-Validation Scors: {cv_results}")
print(f"Average Success (Accuracy): {cv_results.mean():.4f}")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

pipeline.fit(X_train, y_train)

y_pred= pipeline.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Detaylı raporu yazdıralım
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt

y_pred_probs= pipeline.predict_proba(X_test)[:,1]

fpr, tpr, thresholds= roc_curve(y_test, y_pred_probs)

plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Logistic Regression ROC Curve')
plt.show()

print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_probs):.4f}")

In [None]:
from sklearn.model_selection import GridSearchCV
#low c= strong regulation
param_grid={
    "logreg__C":[0.001, 0.01, 0.1, 1, 10, 100],
    "logreg__solver":['liblinear', 'lbfgs']
}

grid_search= GridSearchCV(pipeline, param_grid, cv=kf, scoring="roc_auc")

grid_search.fit(X_train, y_train)

print(f"Best Parameter: {grid_search.best_params_}")
print(f"Best AUC Score: {grid_search.best_score_:.4f}")

In [None]:
best_model = grid_search.best_estimator_

y_pred_final = best_model.predict(X_test)
y_pred_probs_final = best_model.predict_proba(X_test)[:, 1]

from sklearn.metrics import classification_report, roc_auc_score

print("--- FINAL MODEL REPORT ---")
print(classification_report(y_test, y_pred_final))
print(f"Final ROC AUC Score: {roc_auc_score(y_test, y_pred_probs_final):.4f}")