Etapa 1: Carregando os modelos e bibliotecas

In [59]:
import pandas as pd
import numpy as np
from IPython.display import display
import joblib
import pickle

In [60]:
model_joblib = joblib.load('notebooks/models/modelo_random_forest_joblib.pkl')
model_pickle = pickle.load(open('notebooks/models/modelo_random_forest_pickle.pkl','rb'))

Etapa 2: Gera√ß√£o dos arquivos para simula√ß√£o como modelo salvo

In [61]:
# Gerar dados simulados
np.random.seed(42)
n_samples = 5

data_teste = pd.DataFrame({
    'gender': np.random.choice(['Male', 'Female'], n_samples),
    'SeniorCitizen': np.random.choice([0, 1], n_samples),
    'Partner': np.random.choice(['Yes', 'No'], n_samples),
    'Dependents': np.random.choice(['Yes', 'No'], n_samples),
    'tenure': np.random.randint(0, 72, n_samples),
    'PhoneService': np.random.choice(['Yes', 'No'], n_samples),
    'MultipleLines': np.random.choice(['Yes', 'No', 'No phone service'], n_samples),
    'InternetService': np.random.choice(['DSL', 'Fiber optic', 'No'], n_samples),
    'OnlineSecurity': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'OnlineBackup': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'DeviceProtection': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'TechSupport': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'StreamingTV': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'StreamingMovies': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'Contract': np.random.choice(['Month-to-month', 'One year', 'Two year'], n_samples),
    'PaperlessBilling': np.random.choice(['Yes', 'No'], n_samples),
    'PaymentMethod': np.random.choice(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], n_samples),
    'MonthlyCharges': np.round(np.random.uniform(20, 120, n_samples), 2),
})

In [62]:
# A coluna 'TotalCharges' deve ser calculada ap√≥s o DataFrame ser criado,
# pois usa outras colunas como base.
data_teste['TotalCharges'] = np.round(data_teste['MonthlyCharges'] * data_teste['tenure'] + np.random.uniform(-20, 20, n_samples), 2)

In [63]:
# Visualizar o DataFrame usando a fun√ß√£o 'display' do IPython
print("--- Dados Simulados para Previs√£o ---")
display(data_teste)

--- Dados Simulados para Previs√£o ---


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,Male,1,Yes,Yes,21,No,No,No,No,No internet service,No internet service,Yes,No,Yes,One year,No,Mailed check,109.48,2280.89
1,Female,0,Yes,No,52,No,No,No,No,Yes,Yes,No,Yes,No internet service,One year,No,Mailed check,79.79,4142.09
2,Male,0,Yes,No,1,No,Yes,No,No internet service,No internet service,Yes,No,No,No internet service,One year,No,Credit card (automatic),112.19,107.74
3,Male,0,Yes,No,29,Yes,Yes,Fiber optic,No,Yes,No internet service,No,No internet service,No,One year,Yes,Mailed check,28.85,827.5
4,Male,1,No,Yes,37,Yes,Yes,No,No internet service,No internet service,No,Yes,No internet service,Yes,One year,Yes,Mailed check,39.6,1478.35


In [64]:
# C√≥pia de seguran√ßa
df_simulado_proc = data_teste.copy()

# 1. Remover colunas desnecess√°rias
colunas_remover = ['customerID', 'Churn'] if 'customerID' in df_simulado_proc.columns else ['Churn']
df_simulado_proc = df_simulado_proc.drop(columns=colunas_remover, errors='ignore')

# 2. Corrigir TotalCharges (caso exista)
if 'TotalCharges' in df_simulado_proc.columns:
    df_simulado_proc['TotalCharges'] = pd.to_numeric(df_simulado_proc['TotalCharges'], errors='coerce')
    df_simulado_proc['TotalCharges'] = df_simulado_proc['TotalCharges'].fillna(df_simulado_proc['TotalCharges'].median())

# 3. Vari√°veis bin√°rias como 0 e 1
variaveis_binarias = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
for col in variaveis_binarias:
    if col in df_simulado_proc.columns:
        df_simulado_proc[col] = df_simulado_proc[col].map({'Yes': 1, 'No': 0})

# 4. Vari√°veis com "No internet service" ou "No phone service"
substituir_no_service = [
    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
    'TechSupport', 'StreamingTV', 'StreamingMovies', 'MultipleLines'
]
for col in substituir_no_service:
    if col in df_simulado_proc.columns:
        df_simulado_proc[col] = df_simulado_proc[col].replace({'No internet service': 'No', 'No phone service': 'No'})

# 5. Codificar vari√°veis categ√≥ricas
colunas_categoricas = df_simulado_proc.select_dtypes(include='object').columns.tolist()
df_simulado_proc = pd.get_dummies(df_simulado_proc, columns=colunas_categoricas, drop_first=True)

# 6. Garantir que n√£o h√° valores nulos
df_simulado_proc = df_simulado_proc.fillna(0)

# 7. Verificar formato final
print(f"Shape final do dataset processado: {df_simulado_proc.shape}")
df_simulado_proc.head()


Shape final do dataset processado: (5, 17)


Unnamed: 0,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,gender_Male,MultipleLines_Yes,InternetService_No,OnlineBackup_Yes,DeviceProtection_Yes,TechSupport_Yes,StreamingTV_Yes,StreamingMovies_Yes,PaymentMethod_Mailed check
0,1,1,1,21,0,0,109.48,2280.89,True,False,True,False,False,True,False,True,True
1,0,1,0,52,0,0,79.79,4142.09,False,False,True,True,True,False,True,False,True
2,0,1,0,1,0,0,112.19,107.74,True,True,True,False,True,False,False,False,False
3,0,1,0,29,1,1,28.85,827.5,True,True,False,True,False,False,False,False,True
4,1,0,1,37,1,1,39.6,1478.35,True,True,True,False,False,True,False,True,True


In [65]:
# Converter valores booleanos para inteiros (True ‚Üí 1, False ‚Üí 0)
df_simulado_proc = df_simulado_proc.astype(int)

# Verificando resultado
df_simulado_proc.head()


Unnamed: 0,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,gender_Male,MultipleLines_Yes,InternetService_No,OnlineBackup_Yes,DeviceProtection_Yes,TechSupport_Yes,StreamingTV_Yes,StreamingMovies_Yes,PaymentMethod_Mailed check
0,1,1,1,21,0,0,109,2280,1,0,1,0,0,1,0,1,1
1,0,1,0,52,0,0,79,4142,0,0,1,1,1,0,1,0,1
2,0,1,0,1,0,0,112,107,1,1,1,0,1,0,0,0,0
3,0,1,0,29,1,1,28,827,1,1,0,1,0,0,0,0,1
4,1,0,1,37,1,1,39,1478,1,1,1,0,0,1,0,1,1


In [66]:
df_proc = df_simulado_proc.copy()

# exemplo de convers√£o de bools
cols_bool = [col for col in df_proc.columns if df_proc[col].dtype == bool]
df_proc[cols_bool] = df_proc[cols_bool].astype(int)


In [69]:
y_pred = model_joblib.predict(df_proc)
y_proba = model_joblib.predict_proba(df_proc)[:,1]

df_result = df_simulado_proc.copy()
df_result['Churn_Previsto'] = y_pred
df_result['Probabilidade_Churn'] = y_proba
display(df_result)


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Dependents
- DeviceProtection_Yes
- InternetService_No
- MonthlyCharges
- MultipleLines_Yes
- ...
Feature names seen at fit time, yet now missing:
- 0
- 1
- 10
- 11
- 12
- ...


Etapa 3: Carregar o modelo treinado

In [68]:
# Usando joblib
modelo_rf = joblib.load('notebooks/models/modelo_random_forest_joblib.pkl')

# Ou, se preferir usar pickle:
# with open('/content/modelo_random_forest_pickle.pkl', 'rb') as f:
#     modelo_rf = pickle.load(f)

Etapa 4: Fazer previs√µes

In [None]:
# Previs√£o
previsoes = modelo_rf.predict(df_simulado_proc)


# Resultado
print("üîÆ Previs√µes:", previsoes)


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Dependents
- DeviceProtection_Yes
- InternetService_No
- MonthlyCharges
- MultipleLines_Yes
- ...
Feature names seen at fit time, yet now missing:
- 0
- 1
- 10
- 11
- 12
- ...


In [None]:
# Previs√£o bin√°ria
previsoes = modelo_rf.predict(df_simulado_proc)

# Probabilidade de churn (classe 1)
probabilidades = modelo_rf.predict_proba(df_simulado_proc)[:, 1]

# Juntar os resultados em um DataFrame
resultado = df_simulado_proc.copy()
resultado['Churn_Previsto'] = previsoes
resultado['Probabilidade_Churn'] = probabilidades


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Dependents
- DeviceProtection_Yes
- InternetService_No
- MonthlyCharges
- MultipleLines_Yes
- ...
Feature names seen at fit time, yet now missing:
- 0
- 1
- 10
- 11
- 12
- ...


Etapa 5: Exibir os resultados

In [None]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
display(resultado[['Churn_Previsto', 'Probabilidade_Churn'] + list(df_simulado.columns)])

NameError: name 'resultado' is not defined

In [72]:
## Etapa 1: Carregamento de Modelos e Bibliotecas
# Importar bibliotecas necess√°rias
import pandas as pd
import numpy as np
from IPython.display import display
import joblib

# Carregar o modelo treinado usando joblib
try:
    modelo_rf = joblib.load('notebooks/models/modelo_random_forest_joblib.pkl')
    print("‚úÖ Modelo carregado com sucesso!")
except FileNotFoundError:
    print("‚ùå Erro: Arquivo do modelo n√£o encontrado. Verifique o caminho.")
    # Se o modelo n√£o for encontrado, o script pode parar ou seguir sem ele
    modelo_rf = None



## Etapa 2: Gera√ß√£o e Pr√©-processamento dos Dados de Simula√ß√£o
# Definir um seed para reprodutibilidade
np.random.seed(42)
n_samples = 5

# Gerar dados simulados para previs√£o
data_teste = pd.DataFrame({
    'gender': np.random.choice(['Male', 'Female'], n_samples),
    'SeniorCitizen': np.random.choice([0, 1], n_samples),
    'Partner': np.random.choice(['Yes', 'No'], n_samples),
    'Dependents': np.random.choice(['Yes', 'No'], n_samples),
    'tenure': np.random.randint(0, 72, n_samples),
    'PhoneService': np.random.choice(['Yes', 'No'], n_samples),
    'MultipleLines': np.random.choice(['Yes', 'No', 'No phone service'], n_samples),
    'InternetService': np.random.choice(['DSL', 'Fiber optic', 'No'], n_samples),
    'OnlineSecurity': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'OnlineBackup': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'DeviceProtection': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'TechSupport': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'StreamingTV': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'StreamingMovies': np.random.choice(['Yes', 'No', 'No internet service'], n_samples),
    'Contract': np.random.choice(['Month-to-month', 'One year', 'Two year'], n_samples),
    'PaperlessBilling': np.random.choice(['Yes', 'No'], n_samples),
    'PaymentMethod': np.random.choice(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], n_samples),
    'MonthlyCharges': np.round(np.random.uniform(20, 120, n_samples), 2),
})

# Calcular a coluna 'TotalCharges' ap√≥s a cria√ß√£o do DataFrame
data_teste['TotalCharges'] = np.round(data_teste['MonthlyCharges'] * data_teste['tenure'] + np.random.uniform(-20, 20, n_samples), 2)

# Exibir os dados simulados
print("--- Dados Simulados para Previs√£o ---")
display(data_teste)

# Copiar o DataFrame para o pr√©-processamento
df_proc = data_teste.copy()

### Pr√©-processamento dos dados
# 1. Corrigir e preencher a coluna 'TotalCharges'
if 'TotalCharges' in df_proc.columns:
    df_proc['TotalCharges'] = pd.to_numeric(df_proc['TotalCharges'], errors='coerce')
    df_proc['TotalCharges'] = df_proc['TotalCharges'].fillna(df_proc['TotalCharges'].median())

# 2. Mapear vari√°veis bin√°rias ('Yes'/'No') para 1/0
variaveis_binarias = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
for col in variaveis_binarias:
    if col in df_proc.columns:
        df_proc[col] = df_proc[col].map({'Yes': 1, 'No': 0})

# 3. Mapear valores de "no service" para 'No'
substituir_no_service = [
    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
    'StreamingTV', 'StreamingMovies', 'MultipleLines'
]
for col in substituir_no_service:
    if col in df_proc.columns:
        df_proc[col] = df_proc[col].replace({'No internet service': 'No', 'No phone service': 'No'})

# 4. Converter vari√°veis categ√≥ricas restantes para vari√°veis dummy
colunas_categoricas = df_proc.select_dtypes(include='object').columns.tolist()
df_proc = pd.get_dummies(df_proc, columns=colunas_categoricas, drop_first=True)

# 5. Garantir que n√£o h√° valores nulos
df_proc = df_proc.fillna(0)

# 6. Converter booleanos gerados por get_dummies para inteiros (0 ou 1)
df_proc = df_proc.astype(int)

print("\n--- Dados Processados para Previs√£o ---")
print(f"Shape final do dataset: {df_proc.shape}")
display(df_proc.head())



## Etapa 3: Fazer Previs√µes e Exibir os Resultados
if modelo_rf is not None:
    # Fazer previs√µes bin√°rias
    previsoes = modelo_rf.predict(df_proc)

    # Calcular a probabilidade de Churn (classe 1)
    probabilidades = modelo_rf.predict_proba(df_proc)[:, 1]

    # Juntar os resultados com os dados de entrada
    resultado_final = data_teste.copy()
    resultado_final['Churn_Previsto'] = previsoes
    resultado_final['Probabilidade_Churn'] = probabilidades

    # Configurar a exibi√ß√£o de floats para duas casas decimais
    pd.set_option('display.float_format', lambda x: '%.2f' % x)

    print("\n--- Resultados Finais da Previs√£o ---")
    display(resultado_final[['Churn_Previsto', 'Probabilidade_Churn'] + list(data_teste.columns)])
else:
    print("\n‚ö†Ô∏è O modelo n√£o foi carregado. N√£o foi poss√≠vel fazer previs√µes.")

‚úÖ Modelo carregado com sucesso!
--- Dados Simulados para Previs√£o ---


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,Male,1,Yes,Yes,21,No,No,No,No,No internet service,No internet service,Yes,No,Yes,One year,No,Mailed check,109.48,2280.89
1,Female,0,Yes,No,52,No,No,No,No,Yes,Yes,No,Yes,No internet service,One year,No,Mailed check,79.79,4142.09
2,Male,0,Yes,No,1,No,Yes,No,No internet service,No internet service,Yes,No,No,No internet service,One year,No,Credit card (automatic),112.19,107.74
3,Male,0,Yes,No,29,Yes,Yes,Fiber optic,No,Yes,No internet service,No,No internet service,No,One year,Yes,Mailed check,28.85,827.5
4,Male,1,No,Yes,37,Yes,Yes,No,No internet service,No internet service,No,Yes,No internet service,Yes,One year,Yes,Mailed check,39.6,1478.35



--- Dados Processados para Previs√£o ---
Shape final do dataset: (5, 17)


Unnamed: 0,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,gender_Male,MultipleLines_Yes,InternetService_No,OnlineBackup_Yes,DeviceProtection_Yes,TechSupport_Yes,StreamingTV_Yes,StreamingMovies_Yes,PaymentMethod_Mailed check
0,1,1,1,21,0,0,109,2280,1,0,1,0,0,1,0,1,1
1,0,1,0,52,0,0,79,4142,0,0,1,1,1,0,1,0,1
2,0,1,0,1,0,0,112,107,1,1,1,0,1,0,0,0,0
3,0,1,0,29,1,1,28,827,1,1,0,1,0,0,0,0,1
4,1,0,1,37,1,1,39,1478,1,1,1,0,0,1,0,1,1


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Dependents
- DeviceProtection_Yes
- InternetService_No
- MonthlyCharges
- MultipleLines_Yes
- ...
Feature names seen at fit time, yet now missing:
- 0
- 1
- 10
- 11
- 12
- ...
