In [4]:
import pandas as pd
import numpy as np

# Generating synthetic data for the dataset
np.random.seed(42)

# Number of samples
num_samples = 1000

# Creating the data
data = {
    'edad': np.random.randint(18, 70, num_samples),
    'ingreso_anual': np.random.normal(50, 15, num_samples).round(2), # in thousands
    'saldo_promedio': np.random.normal(20, 10, num_samples).round(2), # in thousands
    'gastos_mensuales': np.random.normal(3, 1.5, num_samples).round(2), # in thousands
    'historial_crediticio': np.random.uniform(0, 1, num_samples).round(2),
}

# Converting data into DataFrame
df = pd.DataFrame(data)

# Generating 'ingresos_futuros' with some randomness
df['ingresos_futuros'] = (
    0.5 * df['ingreso_anual'] +
    0.3 * df['saldo_promedio'] -
    0.2 * df['gastos_mensuales'] +
    15 * df['historial_crediticio'] +
    np.random.normal(0, 5, num_samples)
).round(2)

# Generating 'rentable' based on a logistic function of 'ingreso_anual', 'saldo_promedio', and other features
log_odds = (
    0.02 * df['ingreso_anual'] +
    0.03 * df['saldo_promedio'] -
    0.01 * df['gastos_mensuales'] +
    0.4 * df['historial_crediticio']
)
prob_rentable = 1 / (1 + np.exp(-log_odds))
df['rentable'] = (prob_rentable > 0.5).astype(int)


In [5]:
df

Unnamed: 0,edad,ingreso_anual,saldo_promedio,gastos_mensuales,historial_crediticio,ingresos_futuros,rentable
0,56,25.90,37.27,5.99,0.42,22.44,1
1,69,53.05,16.00,1.85,0.02,25.86,1
2,46,38.65,22.25,2.18,0.34,20.56,1
3,32,28.67,29.33,4.29,0.18,24.47,1
4,60,40.30,5.82,2.42,0.10,19.92,1
...,...,...,...,...,...,...,...
995,60,56.29,34.53,-0.16,0.27,39.85,1
996,64,36.69,6.61,4.73,0.74,23.80,1
997,62,43.44,26.93,5.08,0.05,33.15,1
998,35,60.84,13.94,2.55,0.05,41.30,1


In [6]:
csv_path = 'dataset_rentabilidad_clientes.csv'
df.to_csv(csv_path, index=False)