In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("/home/mborges/estudos/mlflow/data/processed/casas.csv")

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
X = df.drop("preco", axis=1)
y = df["preco"].copy()

In [5]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (1022, 3)
X_test shape: (438, 3)


### Linear Regression

In [8]:
import mlflow
mlflow.set_experiment("casas-regression")

2026/01/07 17:30:30 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/07 17:30:30 INFO mlflow.store.db.utils: Updating database tables
2026/01/07 17:30:30 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/07 17:30:30 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/07 17:30:30 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/07 17:30:30 INFO alembic.runtime.migration: Will assume non-transactional DDL.


<Experiment: artifact_location='/home/mborges/estudos/mlflow/notebooks/mlruns/1', creation_time=1767806017430, experiment_id='1', last_update_time=1767806017430, lifecycle_stage='active', name='casas-regression', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [9]:
mlflow.start_run(run_name="linear-regression-run")

<ActiveRun: >

In [10]:
from sklearn.linear_model import LinearRegression
lr=LinearRegression()
lr.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [None]:
mlflow.sklearn.log_model(lr, name="linear-regression-model")



<mlflow.models.model.ModelInfo at 0xf268f4c82ec0>

In [12]:
lr_predict = lr.predict(X_test)

In [13]:
from sklearn.metrics import mean_squared_error, r2_score
import math

In [14]:
mse = mean_squared_error(y_test, lr_predict)
rmse = math.sqrt(mse)
r2 = r2_score(y_test, lr_predict)

mlflow.log_metric("mse_linear_regression", mse)
mlflow.log_metric("rmse_linear_regression", rmse)
mlflow.log_metric("r2_linear_regression", r2)

In [15]:
print(f"mse linear regression: {mse}")
print(f"rmse linear regression: {rmse}")
print(f"r2 linear regression: {r2}")

mse linear regression: 2078666917.9289913
rmse linear regression: 45592.399782518485
r2 linear regression: 0.7021153642898048


In [16]:
mlflow.end_run()

### XGBoost

In [18]:
from xgboost import XGBRegressor, XGBRFRegressor

xgb_params = {
    'learning_rate': 0.2,
    'n_estimators': 50,
    'random_state': 42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.sklearn.log_model(xgb, name="xgboost-regression-model")
    xgb_predicted = xgb.predict(X_test)
    mse_xgb = mean_squared_error(y_test, xgb_predicted)
    rmse_xgb = math.sqrt(mse_xgb)
    r2_xgb = r2_score(y_test, xgb_predicted)
    mlflow.log_metric("mse_xgboost_regression", mse_xgb)
    mlflow.log_metric("rmse_xgboost_regression", rmse_xgb)
    mlflow.log_metric("r2_xgboost_regression", r2_xgb)

In [19]:
print(f"mse XGBoost: {mse_xgb}")
print(f"rmse XGBoost: {rmse_xgb}")
print(f"r2 XGBoost: {r2_xgb}")

mse XGBoost: 1405983360.0
rmse XGBoost: 37496.444631458064
r2 XGBoost: 0.798514723777771


In [20]:
mlflow.get_experiment_by_name("casas-regression")

<Experiment: artifact_location='/home/mborges/estudos/mlflow/notebooks/mlruns/1', creation_time=1767806017430, experiment_id='1', last_update_time=1767806017430, lifecycle_stage='active', name='casas-regression', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [24]:
# List run information for experiment with ID "1"
runs_df = mlflow.search_runs(experiment_ids=["1"])
display(runs_df)

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.r2_xgboost_regression,metrics.rmse_xgboost_regression,metrics.mse_xgboost_regression,metrics.r2_linear_regression,metrics.rmse_linear_regression,metrics.mse_linear_regression,tags.mlflow.runName,tags.mlflow.user,tags.mlflow.source.type,tags.mlflow.source.name
0,55dad82a82f747998911c65d46b2fb6f,1,FINISHED,/home/mborges/estudos/mlflow/notebooks/mlruns/...,2026-01-07 17:44:40.752000+00:00,2026-01-07 17:44:43.074000+00:00,0.798515,37496.444631,1405983000.0,,,,popular-fly-873,mborges,LOCAL,/home/mborges/estudos/venv/lib/python3.10/site...
1,3eb797b9b0c14de29a6a7a60786267c7,1,FINISHED,/home/mborges/estudos/mlflow/notebooks/mlruns/...,2026-01-07 17:35:23.908000+00:00,2026-01-07 17:35:26.227000+00:00,0.774704,39650.177503,1572137000.0,,,,xgboost-regression-run,mborges,LOCAL,/home/mborges/estudos/venv/lib/python3.10/site...
2,6762349478e54d078febc36aa03e2d50,1,FINISHED,/home/mborges/estudos/mlflow/notebooks/mlruns/...,2026-01-07 17:30:31.092000+00:00,2026-01-07 17:33:30.133000+00:00,,,,0.702115,45592.399783,2078667000.0,linear-regression-run,mborges,LOCAL,/home/mborges/estudos/venv/lib/python3.10/site...


In [25]:
mlflow.get_run("55dad82a82f747998911c65d46b2fb6f")

<Run: data=<RunData: metrics={'mse_xgboost_regression': 1405983360.0,
 'r2_xgboost_regression': 0.798514723777771,
 'rmse_xgboost_regression': 37496.444631458064}, params={}, tags={'mlflow.runName': 'popular-fly-873',
 'mlflow.source.name': '/home/mborges/estudos/venv/lib/python3.10/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'mborges'}>, info=<RunInfo: artifact_uri='/home/mborges/estudos/mlflow/notebooks/mlruns/1/55dad82a82f747998911c65d46b2fb6f/artifacts', end_time=1767807883074, experiment_id='1', lifecycle_stage='active', run_id='55dad82a82f747998911c65d46b2fb6f', run_name='popular-fly-873', start_time=1767807880752, status='FINISHED', user_id='mborges'>, inputs=<RunInputs: dataset_inputs=[], model_inputs=[]>, outputs=<RunOutputs: model_outputs=[<LoggedModelOutput: model_id='m-43c2ee71b3114629864c6139ed77ce20', step=0>]>>