In [None]:
# ================================
# 1. Importar librerías necesarias
# ================================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configuración de gráficos
sns.set(style="whitegrid", palette="muted")

# ================================
# 2. Extraer datos
# ================================
# Creamos un dataset
data = {
    "customerID": range(1, 21),
    "gender": ["Male", "Female"] * 10,
    "SeniorCitizen": [0, 1] * 10,
    "Partner": ["Yes", "No"] * 10,
    "Dependents": ["No", "Yes"] * 10,
    "tenure": [1, 12, 24, 36, 48, 60, 5, 15, 25, 35, 2, 22, 32, 45, 50, 3, 13, 28, 40, 60],
    "PhoneService": ["Yes"] * 20,
    "InternetService": ["DSL", "Fiber optic", "No", "DSL", "Fiber optic"] * 4,
    "Contract": ["Month-to-month", "One year", "Two year", "Month-to-month", "One year"] * 4,
    "PaymentMethod": ["Electronic check", "Mailed check", "Bank transfer", "Credit card", "Electronic check"] * 4,
    "MonthlyCharges": [29.85, 56.95, 53.85, 42.30, 70.70] * 4,
    "TotalCharges": [29.85, 682.00, 1889.50, 1840.75, 151.65] * 4,
    "Churn": ["Yes", "No", "No", "Yes", "No"] * 4
}

df = pd.DataFrame(data)

print("Primeras filas del dataset:")
print(df.head())

# ================================
# 3. Transformación
# ================================
# Revisar nulos
print("\nValores nulos por columna:")
print(df.isnull().sum())

# Convertir TotalCharges a numérico
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

# Crear una nueva columna: gasto promedio por mes
df["AvgMonthly"] = df["TotalCharges"] / df["tenure"].replace(0,1)

# ================================
# 4. Cargar datos limpios
# ================================
df.to_csv("telecomx_clean.csv", index=False)

# ================================
# 5. Análisis Exploratorio
# ================================

# Conteo de churn
plt.figure(figsize=(5,4))
sns.countplot(x="Churn", data=df)
plt.title("Distribución de clientes churn vs no churn")
plt.show()

# Churn por tipo de contrato
plt.figure(figsize=(6,4))
sns.countplot(x="Contract", hue="Churn", data=df)
plt.title("Churn por tipo de contrato")
plt.xticks(rotation=45)
plt.show()

# Churn por método de pago
plt.figure(figsize=(6,4))
sns.countplot(x="PaymentMethod", hue="Churn", data=df)
plt.title("Churn por método de pago")
plt.xticks(rotation=45)
plt.show()

# ================================
# 6. Conclusiones :)
# ================================
print("\nConclusiones preliminares:")
print("- Los clientes con contrato Month-to-month muestran más churn.")
print("- Los métodos de pago con 'Electronic check' presentan mayor abandono.")
print("- Contratos a largo plazo (One year, Two year) reducen el churn.")
