## Notebook de exemplo do curso da Alura

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import mlflow
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor    

### Lendo base de dados e separando em treino e teste

In [2]:
# lendo base de dados
df = pd.read_csv("../data/casas.csv")

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
# Seoparando features e target
X = df.drop("preco", axis=1)
y = df["preco"].copy()

In [5]:
# Separando dados de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Precisar rodar o comando mlflow ui no terminal dentro da pasta  notebooks

In [6]:
# Definindo experimento no MLflow
mlflow.set_experiment("projeto_casas_alura")

2025/12/30 15:41:38 INFO mlflow.tracking.fluent: Experiment with name 'projeto_casas_alura' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/Users/Mariana%20Soares%20Sa/Documents/GitHub/estudo_mlflow/notebooks/mlruns/929796341540647291', creation_time=1767120098164, experiment_id='929796341540647291', last_update_time=1767120098164, lifecycle_stage='active', name='projeto_casas_alura', tags={}>

## Testando regressão linear

In [7]:
# Efetuando o treinamento do modelo
lr = LinearRegression()
lr.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [8]:
# Fazendo predict no conjunto de teste
y_pred_lr = lr.predict(X_test)

In [9]:
# Checando os parâmetros do modelo
intercpto = lr.intercept_
b_tamanho = lr.coef_[0]
b_ano = lr.coef_[1]
b_garagem = lr.coef_[2]

In [10]:
# Checando se o predict bate com a fórmula
intercpto + X_test.iloc[0][0]*b_tamanho + X_test.iloc[0][1]*b_ano + X_test.iloc[0][2]*b_garagem

  intercpto + X_test.iloc[0][0]*b_tamanho + X_test.iloc[0][1]*b_ano + X_test.iloc[0][2]*b_garagem


np.float64(120015.36085249473)

In [11]:
y_pred_lr[0]

np.float64(120015.36085249484)

Métricas de Erro

In [12]:
mse = mean_squared_error(y_test, y_pred_lr)
r2 = r2_score(y_test, y_pred_lr)
rmse = mse ** 0.5


In [13]:
# Logando os artefatos no MLflow
with mlflow.start_run():
    # Log do modelo
    mlflow.sklearn.log_model(sk_model=lr, name="linear_regression_model")
    # Log das métricas
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)



## Testando XGBoostRegressor

In [14]:
# Com os parametros default
xgb_reg_default= XGBRegressor()
xgb_reg_default.fit(X_train, y_train)

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


Métricas de Erro

In [15]:
y_pred_xgb_default = xgb_reg_default.predict(X_test)
mse_default = mean_squared_error(y_test, y_pred_xgb_default)
r2_default = r2_score(y_test, y_pred_xgb_default)
rmse_default = mse ** 0.5

In [16]:
# Logando os artefatos no MLflow
with mlflow.start_run():
    # Log do modelo
    mlflow.xgboost.log_model(xgb_reg_default, name="xgboost_regressor_model_default")
    # Log das métricas
    mlflow.log_metric("mse", mse_default)
    mlflow.log_metric("rmse", rmse_default)
    mlflow.log_metric("r2", r2_default)

  self.get_booster().save_model(fname)


Fazendo XGB com outros parâmetros

In [17]:
xgb_params = {
    "learning_rate": 0.15,
    "n_estimators": 100,
    "max_depth": 5,
    "random_state": 42
}

In [18]:
xgb_reg = XGBRegressor(**xgb_params)
xgb_reg.fit(X_train, y_train)

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [19]:
y_pred_xgb = xgb_reg.predict(X_test)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)
rmse_xgb = mse_xgb ** 0.5

In [20]:
# Logando os artefatos no MLflow
# Log do modelo
with mlflow.start_run():
    mlflow.xgboost.log_model(xgb_reg, name="xgboost_regressor_model")
    # Log das métricas
    mlflow.log_metric("mse", mse_xgb)
    mlflow.log_metric("rmse", rmse_xgb)
    mlflow.log_metric("r2", r2_xgb)

  self.get_booster().save_model(fname)
