# Fase 1: Comprender el Negocio

Contexto del negocio: El banco Dormammu ha adquirido a Monopoly y necesita entender el comportamiento
de sus clientes para preparar una estrategia adecuada de abordaje.
Objetivos:
1. Conocer a los clientes a través de sus patrones de comportamiento.
2. Identificar qué productos financieros usan más frecuentemente.
3. Proponer una estrategia de marketing basada en el análisis de datos.


# Preguntas planteadas:
#### 1. ¿Cuáles son los productos financieros más utilizados?
#### 2. ¿Qué tipo de clientes tienen más propensión a solicitar créditos?
#### 3. ¿Existe un grupo de clientes que tienda a permanecer más tiempo con productos bancarios específicos?

# Carga de librerias


In [404]:
# Importar las librerías necesarias
import pandas as pd # type: ignore
import numpy as np # type: ignore
import seaborn as sns # type: ignore
import matplotlib.pyplot as plt # type: ignore
from sklearn.preprocessing import StandardScaler # type: ignore

# Convertir DataSet Xlsx a CSV y guardar en ruta establecida 

In [405]:
# Ruta al archivo Excel y CSV
file_excel = "dataSet/Base_clientes_Monopoly.xlsx"
file_csv = "dataSet/Base_clientes_Monopoly.csv"

# Intentar abrir el archivo CSV
try:
    # Intentar leer el archivo CSV
    with open(file_csv, 'r') as f:
        print(f"El archivo '{file_csv}' ya existe. No se creará nuevamente.")
except FileNotFoundError:
    # Si el archivo no existe, leer el archivo Excel y convertirlo a CSV
    df = pd.read_excel(file_excel, sheet_name="Transición de Negocio")
    df.to_csv(file_csv, index=False)
    print("Archivo convertido a CSV con éxito")


El archivo 'dataSet/Base_clientes_Monopoly.csv' ya existe. No se creará nuevamente.


# Carga Inicial del Data Set

In [406]:
# Leer el archivo CSV (convertido previamente desde Excel)
file_path = "dataSet/Base_clientes_Monopoly.csv"
df = pd.read_csv(file_path)

  df = pd.read_csv(file_path)


# Fase 2: Comprender los Datos

In [407]:
# Visualizamos los primeros datos show
df.head(10)

Unnamed: 0,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,...,1.565,1.566,1.567,1.568,1.569,1.570,1.571,1.572,1.573,574
0,Id,Subsegmento,Sexo,Region,Edad,Renta,Antiguedad,Internauta,Adicional,Dualidad,...,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,IndRev_T01,target,
1,1,160,M,13,43,,130,1,1,0,...,33000,0,1099866,0,1099866,15080,0,R,0,
2,2,160,H,13,46,143640,69,1,0,0,...,300000,0,214592,0,214592,83596,0,R,0,
3,3,170,H,13,45,929106,24,1,1,0,...,216676,0,0,0,7400,0,0,T,0,
4,4,151,H,13,46,172447,134,0,1,0,...,60000,0,272762,0,272762,10591,0,R,0,
5,5,170,H,13,46,805250,116,0,1,1,...,272925,0,249562,0,75339,377782,0,R,0,
6,6,170,H,13,47,707664,67,1,1,0,...,35800,0,35800,0,0,51197,0,R,0,
7,7,811,H,13,48,1022833,21,1,0,1,...,9391,0,8818,0,8818,0,0,T,0,
8,8,170,H,13,46,,69,0,1,1,...,6000,0,283520,0,283520,0,0,R,0,
9,9,170,H,13,49,1171066,33,0,0,0,...,60000,0,507629,0,507629,0,0,R,1,


### Usamos el argumento 'header=1' al leer el archivo para omitir la primera fila incorrecta

In [408]:
# Releer el archivo saltando la primera fila
df = pd.read_csv('dataSet/Base_clientes_Monopoly.csv', header=1)

In [409]:
# Visualizamos los primeros datos show
df.head(10)

Unnamed: 0,Id,Subsegmento,Sexo,Region,Edad,Renta,Antiguedad,Internauta,Adicional,Dualidad,...,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,IndRev_T01,target,Unnamed: 574
0,1,160,M,13.0,43,,130,1,1,0,...,33000,0.0,1099866.0,0.0,1099866.0,15080,0.0,R,0,
1,2,160,H,13.0,46,143640.0,69,1,0,0,...,300000,0.0,214592.0,0.0,214592.0,83596,0.0,R,0,
2,3,170,H,13.0,45,929106.0,24,1,1,0,...,216676,0.0,0.0,0.0,7400.0,0,0.0,T,0,
3,4,151,H,13.0,46,172447.0,134,0,1,0,...,60000,0.0,272762.0,0.0,272762.0,10591,0.0,R,0,
4,5,170,H,13.0,46,805250.0,116,0,1,1,...,272925,0.0,249562.0,0.0,75339.0,377782,0.0,R,0,
5,6,170,H,13.0,47,707664.0,67,1,1,0,...,35800,0.0,35800.0,0.0,0.0,51197,0.0,R,0,
6,7,811,H,13.0,48,1022833.0,21,1,0,1,...,9391,0.0,8818.0,0.0,8818.0,0,0.0,T,0,
7,8,170,H,13.0,46,,69,0,1,1,...,6000,0.0,283520.0,0.0,283520.0,0,0.0,R,0,
8,9,170,H,13.0,49,1171066.0,33,0,0,0,...,60000,0.0,507629.0,0.0,507629.0,0,0.0,R,1,
9,10,170,M,13.0,44,964387.0,23,1,1,0,...,92583,0.0,65487.0,0.0,65487.0,12084,0.0,R,0,


In [410]:
# Exploración de los datos con estadística descriptiva
df.describe()

Unnamed: 0,Id,Subsegmento,Region,Edad,Renta,Antiguedad,Internauta,Adicional,Dualidad,Monoproducto,...,ColMx_T01,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,target,Unnamed: 574
count,51124.0,51124.0,51071.0,51124.0,37759.0,51124.0,51124.0,51124.0,51124.0,51124.0,...,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,0.0
mean,25562.5,182.024274,10.82822,38.702879,663077.1,38.896154,0.684199,0.256181,0.381347,0.063141,...,5237.914,76375.53,1734.93,193948.8,7.323155,184092.3,136032.7,8.294372,0.089977,
std,14758.371918,29.276596,3.392703,13.302573,409279.5,35.672549,0.464839,0.436527,0.485722,0.243218,...,48528.71,149025.6,42353.68,288498.0,108.161194,289173.1,432538.9,113.215624,0.286152,
min,1.0,151.0,1.0,9.0,1.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-1861866.0,-7886.76,-3763997.0,-4.0,-7785.12,0.0,
25%,12781.75,160.0,9.0,28.0,419999.0,14.0,0.0,0.0,0.0,0.0,...,0.0,5000.0,0.0,31981.0,0.0,17707.5,0.0,0.0,0.0,
50%,25562.5,170.0,13.0,35.0,567012.0,25.0,1.0,0.0,0.0,0.0,...,0.0,34001.5,0.0,92230.5,0.0,81129.0,29646.5,0.0,0.0,
75%,38343.25,210.0,13.0,46.0,814903.5,54.0,1.0,1.0,1.0,0.0,...,0.0,92000.0,0.0,235978.0,0.0,227814.5,102102.2,0.0,0.0,
max,51124.0,959.0,13.0,104.0,13089330.0,324.0,1.0,1.0,1.0,1.0,...,2072818.0,8697782.0,4219680.0,6911556.0,3929.81,6911556.0,11785490.0,3929.81,1.0,


In [411]:
# Exploración de los datos con estadística descriptiva
df.describe().round()

Unnamed: 0,Id,Subsegmento,Region,Edad,Renta,Antiguedad,Internauta,Adicional,Dualidad,Monoproducto,...,ColMx_T01,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,target,Unnamed: 574
count,51124.0,51124.0,51071.0,51124.0,37759.0,51124.0,51124.0,51124.0,51124.0,51124.0,...,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,51124.0,0.0
mean,25562.0,182.0,11.0,39.0,663077.0,39.0,1.0,0.0,0.0,0.0,...,5238.0,76376.0,1735.0,193949.0,7.0,184092.0,136033.0,8.0,0.0,
std,14758.0,29.0,3.0,13.0,409279.0,36.0,0.0,0.0,0.0,0.0,...,48529.0,149026.0,42354.0,288498.0,108.0,289173.0,432539.0,113.0,0.0,
min,1.0,151.0,1.0,9.0,1.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-1861866.0,-7887.0,-3763997.0,-4.0,-7785.0,0.0,
25%,12782.0,160.0,9.0,28.0,419999.0,14.0,0.0,0.0,0.0,0.0,...,0.0,5000.0,0.0,31981.0,0.0,17708.0,0.0,0.0,0.0,
50%,25562.0,170.0,13.0,35.0,567012.0,25.0,1.0,0.0,0.0,0.0,...,0.0,34002.0,0.0,92230.0,0.0,81129.0,29646.0,0.0,0.0,
75%,38343.0,210.0,13.0,46.0,814904.0,54.0,1.0,1.0,1.0,0.0,...,0.0,92000.0,0.0,235978.0,0.0,227814.0,102102.0,0.0,0.0,
max,51124.0,959.0,13.0,104.0,13089327.0,324.0,1.0,1.0,1.0,1.0,...,2072818.0,8697782.0,4219680.0,6911556.0,3930.0,6911556.0,11785487.0,3930.0,1.0,


In [412]:
# Verificación de valores faltantes
print("\nValores faltantes por columna:")
df.isnull().sum()


Valores faltantes por columna:


Id                  0
Subsegmento         0
Sexo                1
Region             53
Edad                0
                ...  
UsoL2_T01           0
UsoLI_T01           0
IndRev_T01          0
target              0
Unnamed: 574    51124
Length: 575, dtype: int64

In [413]:
# Revisar los tipos de datos que contiene la columna Sexo en el DataFrame
df['Sexo'].value_counts()

Sexo
H    27410
M    23713
Name: count, dtype: int64

### Cambiaremos los nombres de atributos a un nombre más fácil de entender en español y que explique por sí solo el contenido de el target

In [414]:
# Diccionario para cambiar los nombres de los atributos
nuevos_nombres = {
    'Id': 'ID_Cliente',
    'Subsegmento': 'Subsegmento_Cliente',
    'Sexo': 'Genero',
    'Region': 'Region_Residencia',
    'Edad': 'Edad_Cliente',
    'Renta': 'Renta_Cliente',
    'Antiguedad': 'Antiguedad_Cliente',
    'Internauta': 'Usa_Web_Banco',
    'Adicional': 'Tiene_TC_Adicional',
    'Dualidad': 'Tiene_Multiples_TC',
    'Monoproducto': 'Usa_Solo_TC',
    'Ctacte': 'Tiene_Cuenta_Corriente',
    'Consumo': 'Tiene_Credito_Consumo',
    'Hipotecario': 'Tiene_Credito_Hipotecario',
    'Debito': 'Tiene_Tarjeta_Debito',
    'CambioPin': 'Cambio_Clave_Tarjeta',
    'Cuentas': 'Numero_Cuentas',
    'TC': 'Numero_TC',
    'CUPO_L1': 'Cupo_Comp_Nacional_TC',
    'CUPO_L2': 'Cupo_Avances_Cuotas_TC',
    'CUPO_MX': 'Cupo_Comp_Internacional_TC',
    'FlgAct_T12': 'Actividad_TC_Mes12',
    'Fac_T12': 'Monto_Facturado_TC_Mes12',
    'Txs_T12': 'Num_Transacciones_TC_Mes12',
    'PagoNac_T12': 'Monto_Pagos_Nacionales_Mes12',
    'PagoInt_T12': 'Monto_Pagos_Internacionales_Mes12',
    'EeccNac_T12': 'Monto_Exigido_Nacional_Mes12',
    'EeccInt_T12': 'Monto_Exigido_Internacional_Mes12',
    'UsoL1_T12': 'Deuda_Comp_Nacional_TC_Mes12',
    'UsoL2_T12': 'Deuda_Avances_Cuotas_TC_Mes12',
    'UsoLI_T12': 'Deuda_Comp_Internacional_TC_Mes12',
    'IndRev_T12': 'Indicador_Revolvencia_Mes12'
    # Puedes seguir añadiendo más columnas si es necesario
}

# Renombrar las columnas en el DataFrame
df_renombrado = df.rename(columns=nuevos_nombres)

# Ver los nuevos nombres de las columnas
df_renombrado.columns


Index(['ID_Cliente', 'Subsegmento_Cliente', 'Genero', 'Region_Residencia',
       'Edad_Cliente', 'Renta_Cliente', 'Antiguedad_Cliente', 'Usa_Web_Banco',
       'Tiene_TC_Adicional', 'Tiene_Multiples_TC',
       ...
       'PagoNac_T01', 'PagoInt_T01', 'EeccNac_T01', 'EeccInt_T01', 'UsoL1_T01',
       'UsoL2_T01', 'UsoLI_T01', 'IndRev_T01', 'target', 'Unnamed: 574'],
      dtype='object', length=575)

In [415]:
# Seleccionar solo las columnas numéricas del DataFrame
numerical_columns = df.select_dtypes(include=[np.number])


In [416]:

# Matriz de correlación entre las columnas numéricas

# Fase 3: Preparación de los Datos

In [417]:
# Copiar el DataFrame original
df_filled = df_renombrado.copy()
# Seleccionar solo las columnas numéricas
numerical_columns = df_filled.select_dtypes(include=[np.number])

In [418]:
# Calcular el porcentaje de valores faltantes por columna antes de reemplazar NaN
missing_percentage = df_filled.isnull().mean() * 100
print("Porcentaje de valores faltantes por columna:")
missing_percentage

Porcentaje de valores faltantes por columna:


ID_Cliente               0.000000
Subsegmento_Cliente      0.000000
Genero                   0.001956
Region_Residencia        0.103670
Edad_Cliente             0.000000
                          ...    
UsoL2_T01                0.000000
UsoLI_T01                0.000000
IndRev_T01               0.000000
target                   0.000000
Unnamed: 574           100.000000
Length: 575, dtype: float64

In [419]:
# Muestra de datos antes de reemplazar NaN
print("\nDatos antes de reemplazar NaN por 0:")
df_filled.head()


Datos antes de reemplazar NaN por 0:


Unnamed: 0,ID_Cliente,Subsegmento_Cliente,Genero,Region_Residencia,Edad_Cliente,Renta_Cliente,Antiguedad_Cliente,Usa_Web_Banco,Tiene_TC_Adicional,Tiene_Multiples_TC,...,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,IndRev_T01,target,Unnamed: 574
0,1,160,M,13.0,43,,130,1,1,0,...,33000,0.0,1099866.0,0.0,1099866.0,15080,0.0,R,0,
1,2,160,H,13.0,46,143640.0,69,1,0,0,...,300000,0.0,214592.0,0.0,214592.0,83596,0.0,R,0,
2,3,170,H,13.0,45,929106.0,24,1,1,0,...,216676,0.0,0.0,0.0,7400.0,0,0.0,T,0,
3,4,151,H,13.0,46,172447.0,134,0,1,0,...,60000,0.0,272762.0,0.0,272762.0,10591,0.0,R,0,
4,5,170,H,13.0,46,805250.0,116,0,1,1,...,272925,0.0,249562.0,0.0,75339.0,377782,0.0,R,0,


In [420]:
# Reemplazar valores infinitos y NaN por 0
df_filled = df_filled.replace([np.inf, -np.inf], np.nan).fillna(0)

# Verificar los primeros datos después de reemplazar los NaN por 0
print("\nDatos después de reemplazar NaN por 0:")
df_filled.head()


Datos después de reemplazar NaN por 0:


Unnamed: 0,ID_Cliente,Subsegmento_Cliente,Genero,Region_Residencia,Edad_Cliente,Renta_Cliente,Antiguedad_Cliente,Usa_Web_Banco,Tiene_TC_Adicional,Tiene_Multiples_TC,...,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,IndRev_T01,target,Unnamed: 574
0,1,160,M,13.0,43,0.0,130,1,1,0,...,33000,0.0,1099866.0,0.0,1099866.0,15080,0.0,R,0,0.0
1,2,160,H,13.0,46,143640.0,69,1,0,0,...,300000,0.0,214592.0,0.0,214592.0,83596,0.0,R,0,0.0
2,3,170,H,13.0,45,929106.0,24,1,1,0,...,216676,0.0,0.0,0.0,7400.0,0,0.0,T,0,0.0
3,4,151,H,13.0,46,172447.0,134,0,1,0,...,60000,0.0,272762.0,0.0,272762.0,10591,0.0,R,0,0.0
4,5,170,H,13.0,46,805250.0,116,0,1,1,...,272925,0.0,249562.0,0.0,75339.0,377782,0.0,R,0,0.0


In [421]:
# Comprobar los valores nulos en las columnas numéricas
print("Valores faltantes después del reemplazo de infinitos:")
df_filled[numerical_columns.columns].isnull().sum()

Valores faltantes después del reemplazo de infinitos:


ID_Cliente             0
Subsegmento_Cliente    0
Region_Residencia      0
Edad_Cliente           0
Renta_Cliente          0
                      ..
UsoL1_T01              0
UsoL2_T01              0
UsoLI_T01              0
target                 0
Unnamed: 574           0
Length: 562, dtype: int64

In [422]:
# convertir la columna Sexo a valores numéricos (0 y 1)
df_filled['Genero'] = df_filled['Genero'].map({'H': 1, 'M': 0})

In [423]:
# Verificar los primeros datos después de reemplazar el genero por valores numéricos
print("\nDatos después de reemplazar el genero por valores numéricos:")
df_filled.head()


Datos después de reemplazar el genero por valores numéricos:


Unnamed: 0,ID_Cliente,Subsegmento_Cliente,Genero,Region_Residencia,Edad_Cliente,Renta_Cliente,Antiguedad_Cliente,Usa_Web_Banco,Tiene_TC_Adicional,Tiene_Multiples_TC,...,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,IndRev_T01,target,Unnamed: 574
0,1,160,0.0,13.0,43,0.0,130,1,1,0,...,33000,0.0,1099866.0,0.0,1099866.0,15080,0.0,R,0,0.0
1,2,160,1.0,13.0,46,143640.0,69,1,0,0,...,300000,0.0,214592.0,0.0,214592.0,83596,0.0,R,0,0.0
2,3,170,1.0,13.0,45,929106.0,24,1,1,0,...,216676,0.0,0.0,0.0,7400.0,0,0.0,T,0,0.0
3,4,151,1.0,13.0,46,172447.0,134,0,1,0,...,60000,0.0,272762.0,0.0,272762.0,10591,0.0,R,0,0.0
4,5,170,1.0,13.0,46,805250.0,116,0,1,1,...,272925,0.0,249562.0,0.0,75339.0,377782,0.0,R,0,0.0


In [424]:
# Verificar el tipo de datos de cada columna antes de la normalización
df_filled.dtypes

ID_Cliente               int64
Subsegmento_Cliente      int64
Genero                 float64
Region_Residencia      float64
Edad_Cliente             int64
                        ...   
UsoL2_T01                int64
UsoLI_T01              float64
IndRev_T01              object
target                   int64
Unnamed: 574           float64
Length: 575, dtype: object

In [425]:
# Convertir solo las columnas numéricas (float o int) a enteros
for col in df_filled.columns:
    if pd.api.types.is_numeric_dtype(df_filled[col]):
        df_filled[col] = df_filled[col].fillna(0).astype(int)


In [426]:
# Verificar el tipo de datos de cada columna después de la conversión
df_filled.dtypes

ID_Cliente              int32
Subsegmento_Cliente     int32
Genero                  int32
Region_Residencia       int32
Edad_Cliente            int32
                        ...  
UsoL2_T01               int32
UsoLI_T01               int32
IndRev_T01             object
target                  int32
Unnamed: 574            int32
Length: 575, dtype: object

In [427]:
# Verificar los primeros datos después de reemplazar los valores Float por Int
print("\nDatos después de reemplazar los valores Float por Int:")
df_filled.head()


Datos después de reemplazar los valores Float por Int:


Unnamed: 0,ID_Cliente,Subsegmento_Cliente,Genero,Region_Residencia,Edad_Cliente,Renta_Cliente,Antiguedad_Cliente,Usa_Web_Banco,Tiene_TC_Adicional,Tiene_Multiples_TC,...,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,IndRev_T01,target,Unnamed: 574
0,1,160,0,13,43,0,130,1,1,0,...,33000,0,1099866,0,1099866,15080,0,R,0,0
1,2,160,1,13,46,143640,69,1,0,0,...,300000,0,214592,0,214592,83596,0,R,0,0
2,3,170,1,13,45,929106,24,1,1,0,...,216676,0,0,0,7400,0,0,T,0,0
3,4,151,1,13,46,172447,134,0,1,0,...,60000,0,272762,0,272762,10591,0,R,0,0
4,5,170,1,13,46,805250,116,0,1,1,...,272925,0,249562,0,75339,377782,0,R,0,0


In [428]:
# Eliminar columnas que tienen más del 50% de valores faltantes
threshold = 50  # Puedes ajustar este umbral según sea necesario
columns_to_drop = missing_percentage[missing_percentage > threshold].index

df_filled = df_filled.drop(columns=columns_to_drop)
print(f"Columnas eliminadas: {columns_to_drop}")

Columnas eliminadas: Index(['Unnamed: 574'], dtype='object')


In [429]:
# Eliminar duplicados si existen
df_filled = df_filled.drop_duplicates()

In [430]:
# Revisar las columnas numéricas presentes en el DataFrame
numerical_columns = df_filled.select_dtypes(include=[np.number])

# Verificar si las columnas numéricas están presentes en el DataFrame
print("Columnas numéricas presentes:")
numerical_columns.columns


Columnas numéricas presentes:


Index(['ID_Cliente', 'Subsegmento_Cliente', 'Genero', 'Region_Residencia',
       'Edad_Cliente', 'Renta_Cliente', 'Antiguedad_Cliente', 'Usa_Web_Banco',
       'Tiene_TC_Adicional', 'Tiene_Multiples_TC',
       ...
       'ColL2CC_T01', 'ColMx_T01', 'PagoNac_T01', 'PagoInt_T01', 'EeccNac_T01',
       'EeccInt_T01', 'UsoL1_T01', 'UsoL2_T01', 'UsoLI_T01', 'target'],
      dtype='object', length=562)

In [431]:
# Revisa si el DataFrame está vacío después de eliminar filas con valores no válidos
if df_filled.empty:
    print("El DataFrame está vacío después de eliminar filas con valores no válidos.")
else:
    print("El DataFrame tiene {} filas y {} columnas después de eliminar filas con valores no válidos.".format(
        df_filled.shape[0], df_filled.shape[1]))


El DataFrame tiene 51124 filas y 574 columnas después de eliminar filas con valores no válidos.


In [432]:
# Normalización de los datos numéricos
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_filled[numerical_columns.columns])

In [433]:
# Convertir los datos normalizados a un nuevo DataFrame
df_scaled = pd.DataFrame(scaled_data, columns=numerical_columns.columns)

In [434]:
# Mostrar las primeras filas del DataFrame normalizado
print("\nDatos normalizados (primeras filas):")
df_scaled.head()


Datos normalizados (primeras filas):


Unnamed: 0,ID_Cliente,Subsegmento_Cliente,Genero,Region_Residencia,Edad_Cliente,Renta_Cliente,Antiguedad_Cliente,Usa_Web_Banco,Tiene_TC_Adicional,Tiene_Multiples_TC,...,ColL2CC_T01,ColMx_T01,PagoNac_T01,PagoInt_T01,EeccNac_T01,EeccInt_T01,UsoL1_T01,UsoL2_T01,UsoLI_T01,target
0,-1.732017,-0.75229,-1.075108,0.640409,0.323032,-1.072246,2.553917,0.679384,1.703963,-0.785122,...,-0.136611,-0.107935,-0.291064,-0.040963,3.140147,-0.067675,3.166901,-0.279637,-0.073229,-0.314442
1,-1.731949,-0.75229,0.930139,0.640409,0.548555,-0.757754,0.843902,0.679384,-0.586867,-0.785122,...,-0.40851,-0.107935,1.500592,-0.040963,0.071555,-0.067675,0.105473,-0.121231,-0.073229,-0.314442
2,-1.731881,-0.410717,0.930139,0.640409,0.473381,0.961984,-0.417584,0.679384,1.703963,-0.785122,...,-0.40851,-0.107935,0.941461,-0.040963,-0.672277,-0.067675,-0.611032,-0.314501,-0.073229,-0.314442
3,-1.731814,-1.059706,0.930139,0.640409,0.548555,-0.694682,2.666049,-1.471922,1.703963,-0.785122,...,-0.40851,-0.107935,-0.109885,-0.040963,0.273187,-0.067675,0.306635,-0.290015,-0.073229,-0.314442
4,-1.731746,-0.410717,0.930139,0.640409,0.548555,0.690808,2.161455,-1.471922,1.703963,1.273688,...,-0.112266,-0.107935,1.31891,-0.040963,0.19277,-0.067675,-0.376087,0.558913,-0.073229,-0.314442


In [435]:
# Verificar si hay outliers (valores atípicos)