<a href="https://colab.research.google.com/github/pablocelva/challenge-telecom-x/blob/main/TelecomX_LATAM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#📌 Extracción

✅ Cargamos los datos directamente desde la API utilizando Python.

✅ Convertimos los datos a un DataFrame de Pandas para facilitar su manipulación.

In [1]:
import pandas as pd
import numpy as np

In [2]:
url = 'https://raw.githubusercontent.com/ingridcristh/challenge2-data-science-LATAM/refs/heads/main/TelecomX_Data.json'
df = pd.read_json(url)
df.head()

Unnamed: 0,customerID,Churn,customer,phone,internet,account
0,0002-ORFBO,No,"{'gender': 'Female', 'SeniorCitizen': 0, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'One year', 'PaperlessBilling': '..."
1,0003-MKNFE,No,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
2,0004-TLHLJ,Yes,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
3,0011-IGKFF,Yes,"{'gender': 'Male', 'SeniorCitizen': 1, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
4,0013-EXCHZ,Yes,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."


#🔧 Transformación

✅ Exploramos las columnas del dataset y verificamos sus tipos de datos.

✅ Identificamos las columnas más relevantes para el análisis de evasión.

📌 Tips:
🔗 Documentación de DataFrame.info()
🔗 Documentación de DataFrame.dtypes

In [3]:
df.shape

(7267, 6)

In [4]:
df.columns

Index(['customerID', 'Churn', 'customer', 'phone', 'internet', 'account'], dtype='object')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   customerID  7267 non-null   object
 1   Churn       7267 non-null   object
 2   customer    7267 non-null   object
 3   phone       7267 non-null   object
 4   internet    7267 non-null   object
 5   account     7267 non-null   object
dtypes: object(6)
memory usage: 340.8+ KB


In [6]:
df.dtypes

Unnamed: 0,0
customerID,object
Churn,object
customer,object
phone,object
internet,object
account,object


In [7]:
if 'customerID' in df.columns:
 df.drop('customerID', axis=1, inplace=True)
df.head()

Unnamed: 0,Churn,customer,phone,internet,account
0,No,"{'gender': 'Female', 'SeniorCitizen': 0, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'One year', 'PaperlessBilling': '..."
1,No,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
2,Yes,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
3,Yes,"{'gender': 'Male', 'SeniorCitizen': 1, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
4,Yes,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."


✅ Comprobamos incoherencias e inconsistencias en los datos, verificando si hay problemas en los datos que puedan afectar el análisis.

✅ Prestamos atención a valores ausentes, duplicados, errores de formato e inconsistencias en las categorías.

📌 Tips:
🔗 Documentación de pandas.unique()
🔗 Documentación de pandas.Series.dt.normalize()

In [8]:
df['Churn'].sample(2)

Unnamed: 0,Churn
3807,Yes
2673,No


In [9]:
df['account'].sample(2)

Unnamed: 0,account
2171,"{'Contract': 'Month-to-month', 'PaperlessBilli..."
4522,"{'Contract': 'Month-to-month', 'PaperlessBilli..."


In [10]:
df['account'][1261]

{'Contract': 'Two year',
 'PaperlessBilling': 'No',
 'PaymentMethod': 'Bank transfer (automatic)',
 'Charges': {'Monthly': 80.45, 'Total': '5737.6'}}

In [11]:
df['customer'].sample(2)

Unnamed: 0,customer
3346,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part..."
4396,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part..."


In [12]:
df['customer'][1546]

{'gender': 'Male',
 'SeniorCitizen': 0,
 'Partner': 'No',
 'Dependents': 'No',
 'tenure': 56}

In [13]:
pd.unique(df['Churn'])

array(['No', 'Yes', ''], dtype=object)

In [14]:
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0, ' ': 1})

In [15]:
df['Churn'] = df['Churn'].fillna(0)

In [16]:
df['Churn'] = df['Churn'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Churn     7267 non-null   int64 
 1   customer  7267 non-null   object
 2   phone     7267 non-null   object
 3   internet  7267 non-null   object
 4   account   7267 non-null   object
dtypes: int64(1), object(4)
memory usage: 284.0+ KB


In [17]:
pd.unique(df['Churn'])

array([0, 1])

In [19]:
df['Churn'].isna().sum()

np.int64(0)

In [20]:
df['Churn'].isnull().sum()

np.int64(0)

In [21]:
pd.json_normalize(df['customer']).sample(2)

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure
2007,Male,0,No,No,71
2359,Male,1,No,No,11


In [22]:
df = pd.concat([df.drop(['customer'], axis=1), pd.json_normalize(df['customer'])], axis=1)
df.head()

Unnamed: 0,Churn,phone,internet,account,gender,SeniorCitizen,Partner,Dependents,tenure
0,0,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'One year', 'PaperlessBilling': '...",Female,0,Yes,Yes,9
1,0,"{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Male,0,No,No,9
2,1,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Male,0,No,No,4
3,1,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Male,1,Yes,No,13
4,1,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Female,1,Yes,No,3


In [23]:
pd.unique(df['tenure'])

array([ 9,  4, 13,  3, 71, 63,  7, 65, 54, 72,  5, 56, 34,  1, 45, 50, 23,
       55, 26, 69, 11, 37, 49, 66, 67, 20, 43, 59, 12, 27,  2, 25, 29, 14,
       35, 64, 39, 40,  6, 30, 70, 57, 58, 16, 32, 33, 10, 21, 61, 15, 44,
       22, 24, 19, 47, 62, 46, 52,  8, 60, 48, 28, 41, 53, 68, 51, 31, 36,
       17, 18, 38, 42,  0])

In [24]:
pd.json_normalize(df['account']).sample(2)

Unnamed: 0,Contract,PaperlessBilling,PaymentMethod,Charges.Monthly,Charges.Total
515,Two year,No,Credit card (automatic),90.3,6287.3
118,Two year,Yes,Bank transfer (automatic),102.4,6471.85


In [25]:
df = pd.concat([df.drop(['account'], axis=1), pd.json_normalize(df['account'], sep='_')], axis=1)
df.head()

Unnamed: 0,Churn,phone,internet,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,Charges_Total
0,0,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...",Female,0,Yes,Yes,9,One year,Yes,Mailed check,65.6,593.3
1,0,"{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...",Male,0,No,No,9,Month-to-month,No,Mailed check,59.9,542.4
2,1,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...",Male,0,No,No,4,Month-to-month,Yes,Electronic check,73.9,280.85
3,1,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...",Male,1,Yes,No,13,Month-to-month,Yes,Electronic check,98.0,1237.85
4,1,"{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...",Female,1,Yes,No,3,Month-to-month,Yes,Mailed check,83.9,267.4


In [26]:
pd.unique(df['Charges_Total'])

array(['593.3', '542.4', '280.85', ..., '742.9', '4627.65', '3707.6'],
      dtype=object)

In [27]:
pd.json_normalize(df['phone']).sample(2)

Unnamed: 0,PhoneService,MultipleLines
4525,Yes,Yes
687,Yes,No


In [28]:
df = pd.concat([df.drop(['phone'], axis=1), pd.json_normalize(df['phone'])], axis=1)
df.head()

Unnamed: 0,Churn,internet,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,Charges_Total,PhoneService,MultipleLines
0,0,"{'InternetService': 'DSL', 'OnlineSecurity': '...",Female,0,Yes,Yes,9,One year,Yes,Mailed check,65.6,593.3,Yes,No
1,0,"{'InternetService': 'DSL', 'OnlineSecurity': '...",Male,0,No,No,9,Month-to-month,No,Mailed check,59.9,542.4,Yes,Yes
2,1,"{'InternetService': 'Fiber optic', 'OnlineSecu...",Male,0,No,No,4,Month-to-month,Yes,Electronic check,73.9,280.85,Yes,No
3,1,"{'InternetService': 'Fiber optic', 'OnlineSecu...",Male,1,Yes,No,13,Month-to-month,Yes,Electronic check,98.0,1237.85,Yes,No
4,1,"{'InternetService': 'Fiber optic', 'OnlineSecu...",Female,1,Yes,No,3,Month-to-month,Yes,Mailed check,83.9,267.4,Yes,No


In [29]:
pd.unique(df['MultipleLines'])

array(['No', 'Yes', 'No phone service'], dtype=object)

In [30]:
pd.json_normalize(df['internet']).sample(2)

Unnamed: 0,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies
6480,DSL,No,No,No,No,No,No
5443,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service


In [31]:
df = pd.concat([df.drop(['internet'], axis=1), pd.json_normalize(df['internet'])], axis=1)
df.head()

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,Charges_Total,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies
0,0,Female,0,Yes,Yes,9,One year,Yes,Mailed check,65.6,593.3,Yes,No,DSL,No,Yes,No,Yes,Yes,No
1,0,Male,0,No,No,9,Month-to-month,No,Mailed check,59.9,542.4,Yes,Yes,DSL,No,No,No,No,No,Yes
2,1,Male,0,No,No,4,Month-to-month,Yes,Electronic check,73.9,280.85,Yes,No,Fiber optic,No,No,Yes,No,No,No
3,1,Male,1,Yes,No,13,Month-to-month,Yes,Electronic check,98.0,1237.85,Yes,No,Fiber optic,No,Yes,Yes,No,Yes,Yes
4,1,Female,1,Yes,No,3,Month-to-month,Yes,Mailed check,83.9,267.4,Yes,No,Fiber optic,No,No,No,Yes,Yes,No


In [32]:
pd.unique(df['StreamingMovies'])

array(['No', 'Yes', 'No internet service'], dtype=object)

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Churn             7267 non-null   int64  
 1   gender            7267 non-null   object 
 2   SeniorCitizen     7267 non-null   int64  
 3   Partner           7267 non-null   object 
 4   Dependents        7267 non-null   object 
 5   tenure            7267 non-null   int64  
 6   Contract          7267 non-null   object 
 7   PaperlessBilling  7267 non-null   object 
 8   PaymentMethod     7267 non-null   object 
 9   Charges_Monthly   7267 non-null   float64
 10  Charges_Total     7267 non-null   object 
 11  PhoneService      7267 non-null   object 
 12  MultipleLines     7267 non-null   object 
 13  InternetService   7267 non-null   object 
 14  OnlineSecurity    7267 non-null   object 
 15  OnlineBackup      7267 non-null   object 
 16  DeviceProtection  7267 non-null   object 


In [34]:
df.shape

(7267, 20)

In [35]:
df['Charges_Total'].isnull().sum()

np.int64(0)

In [36]:
df['Charges_Total'].isna().sum()

np.int64(0)

In [37]:
df['Charges_Total'].sample(2)

Unnamed: 0,Charges_Total
554,1108.8
2501,3282.75


In [38]:
df['Charges_Total'] = df['Charges_Total'].replace(' ', np.nan)

In [39]:
df['Charges_Total'] = df['Charges_Total'].astype(np.float64)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Churn             7267 non-null   int64  
 1   gender            7267 non-null   object 
 2   SeniorCitizen     7267 non-null   int64  
 3   Partner           7267 non-null   object 
 4   Dependents        7267 non-null   object 
 5   tenure            7267 non-null   int64  
 6   Contract          7267 non-null   object 
 7   PaperlessBilling  7267 non-null   object 
 8   PaymentMethod     7267 non-null   object 
 9   Charges_Monthly   7267 non-null   float64
 10  Charges_Total     7256 non-null   float64
 11  PhoneService      7267 non-null   object 
 12  MultipleLines     7267 non-null   object 
 13  InternetService   7267 non-null   object 
 14  OnlineSecurity    7267 non-null   object 
 15  OnlineBackup      7267 non-null   object 
 16  DeviceProtection  7267 non-null   object 


✅ Ahora que los datos están limpios, es momento de crear la columna "Cuentas_Diarias". Utilizamos la facturación mensual para calcular el valor diario, proporcionando una visión más detallada del comportamiento de los clientes a lo largo del tiempo.

In [41]:
df['Cuentas_Diarias'] = df['Charges_Monthly'] / 30
df.head()

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
0,0,Female,0,Yes,Yes,9,One year,Yes,Mailed check,65.6,...,Yes,No,DSL,No,Yes,No,Yes,Yes,No,2.186667
1,0,Male,0,No,No,9,Month-to-month,No,Mailed check,59.9,...,Yes,Yes,DSL,No,No,No,No,No,Yes,1.996667
2,1,Male,0,No,No,4,Month-to-month,Yes,Electronic check,73.9,...,Yes,No,Fiber optic,No,No,Yes,No,No,No,2.463333
3,1,Male,1,Yes,No,13,Month-to-month,Yes,Electronic check,98.0,...,Yes,No,Fiber optic,No,Yes,Yes,No,Yes,Yes,3.266667
4,1,Female,1,Yes,No,3,Month-to-month,Yes,Mailed check,83.9,...,Yes,No,Fiber optic,No,No,No,Yes,Yes,No,2.796667


✅ La estandarización y transformación de datos busca hacer que la información sea más consistente, comprensible y adecuada para el análisis. Durante esta fase convertimos valores textuales como "Sí" y "No" en valores binarios (1 y 0), lo que facilita el procesamiento matemático y la aplicación de modelos analíticos.

✅ Además, traducir o renombrar columnas y datos hace que la información sea más accesible y fácil de entender, especialmente cuando se trabaja con fuentes externas o términos técnicos.

In [42]:
df.sample(3)

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
4985,0,Female,1,Yes,No,72,Two year,No,Bank transfer (automatic),24.3,...,Yes,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,0.81
1323,0,Female,0,Yes,No,36,Month-to-month,No,Electronic check,80.4,...,Yes,No,Fiber optic,No,No,No,No,No,Yes,2.68
5203,0,Male,0,No,Yes,15,One year,No,Mailed check,64.85,...,Yes,No,DSL,Yes,No,Yes,No,No,Yes,2.161667


In [43]:
df['PhoneService'] = df['PhoneService'].map({'Yes': 1, 'No': 0})
df.sample(3)

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
1285,1,Female,1,No,No,1,Month-to-month,Yes,Electronic check,70.15,...,1,No,Fiber optic,No,No,No,No,No,No,2.338333
6778,0,Male,0,Yes,No,38,One year,Yes,Electronic check,110.7,...,1,Yes,Fiber optic,Yes,No,Yes,Yes,Yes,Yes,3.69
793,0,Female,0,No,Yes,44,One year,Yes,Mailed check,77.55,...,1,Yes,DSL,No,Yes,No,No,Yes,Yes,2.585


In [44]:
df['PaperlessBilling'] = df['PaperlessBilling'].map({'Yes': 1, 'No': 0})
df.sample(3)

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
4868,0,Male,0,No,No,50,One year,0,Credit card (automatic),114.35,...,1,Yes,Fiber optic,Yes,Yes,Yes,Yes,Yes,Yes,3.811667
3265,1,Male,1,Yes,No,49,Month-to-month,0,Credit card (automatic),90.05,...,1,Yes,Fiber optic,No,Yes,No,No,No,Yes,3.001667
380,0,Female,0,Yes,Yes,14,Month-to-month,0,Mailed check,25.55,...,1,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,0.851667


In [45]:
df['Partner'] = df['Partner'].map({'Yes': 1, 'No': 0})
df.sample(3)

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
3382,1,Male,0,1,Yes,18,Month-to-month,0,Electronic check,57.45,...,1,No,DSL,No,No,Yes,No,No,Yes,1.915
2186,0,Male,0,0,No,14,Month-to-month,0,Mailed check,19.9,...,1,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,0.663333
1300,1,Male,1,0,No,41,Month-to-month,1,Electronic check,98.4,...,1,Yes,Fiber optic,No,No,Yes,No,Yes,Yes,3.28


In [46]:
df['Dependents'] = df['Dependents'].map({'Yes': 1, 'No': 0})
df.sample(3)

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
1143,1,Female,0,1,0,42,One year,0,Mailed check,33.55,...,0,No phone service,DSL,No,Yes,No,Yes,No,No,1.118333
2864,0,Male,0,0,0,71,Two year,1,Credit card (automatic),19.9,...,1,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,0.663333
4865,0,Female,0,0,0,11,Month-to-month,1,Electronic check,64.9,...,1,No,DSL,No,No,No,No,Yes,Yes,2.163333


In [47]:
df['Dependents'].isnull().sum()

np.int64(0)

In [48]:
columns_to_map = ['MultipleLines', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']

for col in columns_to_map:
    df[col] = df[col].map({'Yes': 1, 'No': 0, 'No phone service': 0, 'No internet service': 0})

df.head()

Unnamed: 0,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,...,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Cuentas_Diarias
0,0,Female,0,1,1,9,One year,1,Mailed check,65.6,...,1,0,DSL,0,1,0,1,1,0,2.186667
1,0,Male,0,0,0,9,Month-to-month,0,Mailed check,59.9,...,1,1,DSL,0,0,0,0,0,1,1.996667
2,1,Male,0,0,0,4,Month-to-month,1,Electronic check,73.9,...,1,0,Fiber optic,0,0,1,0,0,0,2.463333
3,1,Male,1,1,0,13,Month-to-month,1,Electronic check,98.0,...,1,0,Fiber optic,0,1,1,0,1,1,3.266667
4,1,Female,1,1,0,3,Month-to-month,1,Mailed check,83.9,...,1,0,Fiber optic,0,0,0,1,1,0,2.796667


In [51]:
df = df.rename(columns={'Charges_Monthly': 'Cuenta_Mensual'})

In [54]:
df = df.rename(columns={'Charges_Total': 'Cuenta_Total'})

In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Churn             7267 non-null   int64  
 1   gender            7267 non-null   object 
 2   SeniorCitizen     7267 non-null   int64  
 3   Partner           7267 non-null   int64  
 4   Dependents        7267 non-null   int64  
 5   tenure            7267 non-null   int64  
 6   Contract          7267 non-null   object 
 7   PaperlessBilling  7267 non-null   int64  
 8   PaymentMethod     7267 non-null   object 
 9   Cuenta_Mensual    7267 non-null   float64
 10  Cuenta_Total      7256 non-null   float64
 11  PhoneService      7267 non-null   int64  
 12  MultipleLines     7267 non-null   int64  
 13  InternetService   7267 non-null   object 
 14  OnlineSecurity    7267 non-null   int64  
 15  OnlineBackup      7267 non-null   int64  
 16  DeviceProtection  7267 non-null   int64  


#📊 Carga y análisis

#📄Informe final