In [2]:
import pandas as pd
import numpy as np

# Número de amostras
num_samples = 5000

# --- CÓDIGO CORRIGIDO ---

# 1. Gerar dados com colunas independentes primeiro
data = {
    'CustomerID': [f'CUST-{i:04d}' for i in range(num_samples)],
    'TenureMonths': np.random.randint(1, 72, size=num_samples),
    'ContractType': np.random.choice(['Month-to-month', 'One year', 'Two year'], size=num_samples, p=[0.6, 0.3, 0.1]),
    'MonthlyCharges': np.random.normal(75, 25, size=num_samples),
    'SupportTickets': np.random.randint(0, 10, size=num_samples),
    'FeatureUsageScore': np.random.randint(20, 100, size=num_samples)
}
df = pd.DataFrame(data)

# 2. Agora, criar as colunas calculadas e aplicar os limites
df['TotalCharges'] = df['TenureMonths'] * df['MonthlyCharges'] * np.random.normal(1, 0.1, size=num_samples)
df['MonthlyCharges'] = df['MonthlyCharges'].clip(lower=20)
df['TotalCharges'] = df['TotalCharges'].clip(lower=20)

# 3. Criar a variável alvo (Churn) com base em regras lógicas
churn_probability = (
    -0.1 * (df['TenureMonths'] / 72) +
    0.3 * (df['MonthlyCharges'] / df['MonthlyCharges'].max()) +
    -0.3 * (df['ContractType'].map({'Month-to-month': 0.8, 'One year': 0.4, 'Two year': 0.1})) +
    0.2 * (df['SupportTickets'] / 10) +
    -0.2 * (df['FeatureUsageScore'] / 100)
)
churn_probability = (churn_probability - churn_probability.min()) / (churn_probability.max() - churn_probability.min())
df['Churn'] = (churn_probability > np.random.normal(0.6, 0.1, size=num_samples)).astype(int)

# 4. Salvar no CSV
# Verifique se a pasta 'data' existe. Se não, crie-a.
import os
os.makedirs('../data', exist_ok=True)
df.to_csv('../data/customers.csv', index=False)

# 5. Imprimir confirmação e resultados
print("Arquivo 'customers.csv' gerado com sucesso!")
print(df.head())
print(f"\nDistribuição de Churn:\n{df['Churn'].value_counts(normalize=True)}")

Arquivo 'customers.csv' gerado com sucesso!
  CustomerID  TenureMonths    ContractType  MonthlyCharges  SupportTickets  \
0  CUST-0000            43        One year      102.791679               3   
1  CUST-0001            66  Month-to-month       86.388938               0   
2  CUST-0002             3  Month-to-month      108.995329               6   
3  CUST-0003            45        One year       51.087282               5   
4  CUST-0004            17        One year       64.689449               0   

   FeatureUsageScore  TotalCharges  Churn  
0                 40   4852.709454      0  
1                 55   6031.000057      0  
2                 63    304.863230      0  
3                 29   1849.294654      0  
4                 88   1008.802445      0  

Distribuição de Churn:
Churn
0    0.7908
1    0.2092
Name: proportion, dtype: float64
