In [1]:
# Importando bibliotecas necessárias
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib  # Para salvar os modelos


In [6]:
# Carregamento dos dados
df = pd.read_csv("base_modelo_diario.csv", sep=",")
df.head()

Unnamed: 0,Data,MRR_Total,Churn_Total,MRR_Essencial,MRR_Controle,MRR_Avancado,MRR_Performance,Churn_Essencial,Churn_Controle,Churn_Avancado,...,Trials,Conversao_Trial_Pago_%,Engajamento_Medio,%_Base_<30_dias,%_Base_Ativa,%_Ativacao_Features,Engajamento_Essencial,Engajamento_Controle,Engajamento_Avancado,Engajamento_Performance
0,2022-06-01,4559605.7,6.0,1334351.48,1007471.35,889040.72,1328742.15,6.11,6.06,5.83,...,75,39.38,0.116,22.47,76.18,43.63,0.087,0.096,0.149,0.148
1,2022-06-02,4484408.28,6.57,1346620.24,1093487.86,950287.78,1094012.4,6.77,6.46,6.33,...,67,35.88,0.401,22.2,65.31,49.17,0.381,0.425,0.429,0.443
2,2022-06-03,4579722.62,4.3,1289938.02,1165017.79,891651.98,1233114.84,4.49,4.43,4.1,...,88,44.84,0.115,21.6,74.45,51.76,0.087,0.115,0.139,0.141
3,2022-06-04,4685763.58,3.18,1339817.85,1200189.69,859747.13,1286008.92,3.41,3.2,3.07,...,71,37.18,0.055,22.66,71.55,50.28,0.04,0.051,0.081,0.096
4,2022-06-05,4475901.6,6.36,1429894.82,1139683.7,905346.87,1000976.21,6.57,6.42,6.03,...,71,33.42,0.089,20.75,71.85,56.6,0.083,0.088,0.12,0.121


In [None]:
# Removendo colunas desnecessárias
df_model = df.drop(columns=['Data'])  # Remover coluna temporal


In [8]:
# Targets
target_mrr = 'MRR_Total'
target_churn = 'Churn_Total'

# Features (remover os targets)
features = df_model.drop(columns=[target_mrr, target_churn])


In [9]:
# Padronizar os dados
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)


In [10]:
# Separar os alvos
y_mrr = df_model[target_mrr]
y_churn = df_model[target_churn]


In [11]:
# Separar os dados em conjuntos de treino e teste
X_train_mrr, X_test_mrr, y_train_mrr, y_test_mrr = train_test_split(X_scaled, y_mrr, test_size=0.2, random_state=42)
X_train_churn, X_test_churn, y_train_churn, y_test_churn = train_test_split(X_scaled, y_churn, test_size=0.2, random_state=42)


In [12]:
# Criação dos modelos
model_mrr = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=4, random_state=42)
model_churn = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=4, random_state=42)


In [13]:
# Treinar os modelos
model_mrr.fit(X_train_mrr, y_train_mrr)
model_churn.fit(X_train_churn, y_train_churn)


In [15]:
# Avaliação da performance dos modelos
y_pred_mrr = model_mrr.predict(X_test_mrr)
y_pred_churn = model_churn.predict(X_test_churn)
print("📈 Modelo MRR")
print(f"MAE: R${mean_absolute_error(y_test_mrr, y_pred_mrr):,.2f}")
print(f"R²: {r2_score(y_test_mrr, y_pred_mrr):.3f}")

print("\n📉 Modelo Churn")
print(f"MAE: {mean_absolute_error(y_test_churn, y_pred_churn):.3f} p.p.")
print(f"R²: {r2_score(y_test_churn, y_pred_churn):.3f}")


📈 Modelo MRR
MAE: R$29,122.59
R²: 0.988

📉 Modelo Churn
MAE: 0.049 p.p.
R²: 0.995


In [16]:
# Salvar os modelos para uso no streamlit
joblib.dump(model_mrr, "modelo_mrr.pkl")
joblib.dump(model_churn, "modelo_churn.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']