In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

In [2]:

# -----------------------------
# 1. Cargar dataset
# -----------------------------
df = pd.read_csv("../data/churn_dataset_300.csv")

X = df.drop(["CustomerID", "Churn"], axis=1)
y = df["Churn"]

In [3]:

# -----------------------------
# 2. Preprocesamiento
# -----------------------------
cat_features = ["Contract"]
num_features = ["Tenure", "MonthlyCharges", "Complaints", "PaymentLate"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(drop="first"), cat_features),
        ("num", "passthrough", num_features)
    ]
)

# -----------------------------
# 3. Pipeline con modelo
# -----------------------------
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=200, random_state=42, class_weight="balanced"
    ))
])

# -----------------------------
# 4. Train-Test Split + Entrenar
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

pipeline.fit(X_train, y_train)

# -----------------------------
# 5. Evaluación
# -----------------------------
y_pred = pipeline.predict(X_test)
y_proba = pipeline.predict_proba(X_test)[:,1]

print("ROC-AUC:", roc_auc_score(y_test, y_proba))
print(classification_report(y_test, y_pred))

ROC-AUC: 0.9993990384615384
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        64
           1       1.00      0.92      0.96        26

    accuracy                           0.98        90
   macro avg       0.98      0.96      0.97        90
weighted avg       0.98      0.98      0.98        90



In [None]:

pipeline.fit(X, y)
# -----------------------------
# 6. Guardar modelo entrenado
# -----------------------------
joblib.dump(pipeline, "../data/churn_model.pkl")
print("✅ Modelo guardado en ../data/churn_model.pkl")

✅ Modelo guardado en ../data/churn_model.pkl


In [None]:

# -----------------------------
# 7. Ejemplo de carga y predicción
# -----------------------------
# pipeline_loaded = joblib.load("../data/churn_model.pkl")
# nuevo_cliente = pd.DataFrame([{
#     "Tenure": 5,
#     "MonthlyCharges": 95,
#     "Contract": "Month-to-Month",
#     "Complaints": 2,
#     "PaymentLate": 1
# }])
# print("Predicción churn:", pipeline_loaded.predict(nuevo_cliente))
# print("Probabilidad churn:", pipeline_loaded.predict_proba(nuevo_cliente))
