In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/processed/casas.csv')

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
X = df.drop('preco', axis=1)
y = df['preco'].copy()

In [5]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
X_train.shape

(1022, 3)

In [8]:
X_test.shape

(438, 3)

In [9]:
import mlflow

In [10]:
mlflow.set_experiment('house-prices-eda')

# Linear Regression

In [11]:
mlflow.start_run()

<ActiveRun: >

In [12]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [13]:
mlflow.sklearn.log_model(lr,'lr')

In [14]:
lr_predicted = lr.predict(X_test)

In [15]:
len(lr_predicted)

438

In [16]:
X_test.iloc[0]

tamanho      99.0
ano        1963.0
garagem       1.0
Name: 892, dtype: float64

In [17]:
y_test

892     154500
1105    325000
413     115000
522     159000
1036    315500
         ...  
331     139000
323     126175
650     205950
439     110000
798     485000
Name: preco, Length: 438, dtype: int64

In [18]:
from sklearn.metrics import mean_squared_error, r2_score

In [19]:
import math

In [20]:
mse = mean_squared_error(y_test, lr_predicted)
rmse =  math.sqrt(mse)
r2 = r2_score(y_test, lr_predicted)
mlflow.log_metric('mse',mse)
mlflow.log_metric('rmse',rmse)
mlflow.log_metric('r2',r2)

In [21]:
print('Metrics: MSE = {0}, RMSE = {1}, R2 = {2}'.format(mse, rmse, r2))

Metrics: MSE = 2078666917.9289908, RMSE = 45592.39978251848, R2 = 0.7021153642898048


In [22]:
mlflow.end_run()

In [23]:
from xgboost import XGBRFRegressor, XGBRegressor

In [24]:
xgb_params = {
    'learning_rate':0.2,
    'n_estimators': 50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgboost')
    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)

In [25]:
print('Metrics: MSE = {0}, RMSE = {1}, R2 = {2}'.format(mse, rmse, r2))

Metrics: MSE = 1386727460.1346002, RMSE = 37238.789724353286, R2 = 0.8012741720529797


In [26]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///Users/marinaramalhetedesouza/Documents/MLflow/mlflow_example/mlflow/notebooks/mlruns/1', experiment_id='1', lifecycle_stage='active', name='house-prices-eda', tags={}>

In [27]:
mlflow.list_run_infos('1')

[<RunInfo: artifact_uri='file:///Users/marinaramalhetedesouza/Documents/MLflow/mlflow_example/mlflow/notebooks/mlruns/1/3eb7700937874ed2aed7e5b1bff9cbae/artifacts', end_time=1641851528658, experiment_id='1', lifecycle_stage='active', run_id='3eb7700937874ed2aed7e5b1bff9cbae', run_uuid='3eb7700937874ed2aed7e5b1bff9cbae', start_time=1641851528499, status='FINISHED', user_id='marinaramalhetedesouza'>,
 <RunInfo: artifact_uri='file:///Users/marinaramalhetedesouza/Documents/MLflow/mlflow_example/mlflow/notebooks/mlruns/1/603c5becbeb047a4ad891724eeaf030e/artifacts', end_time=1641851527405, experiment_id='1', lifecycle_stage='active', run_id='603c5becbeb047a4ad891724eeaf030e', run_uuid='603c5becbeb047a4ad891724eeaf030e', start_time=1641851522372, status='FINISHED', user_id='marinaramalhetedesouza'>,
 <RunInfo: artifact_uri='file:///Users/marinaramalhetedesouza/Documents/MLflow/mlflow_example/mlflow/notebooks/mlruns/1/7991c3d91b16437d8927109ce37e41a3/artifacts', end_time=1641851334541, experim

In [28]:
mlflow.get_run('3eb7700937874ed2aed7e5b1bff9cbae')

<Run: data=<RunData: metrics={'mse': 1386727460.1346002,
 'r2': 0.8012741720529797,
 'rmse': 37238.789724353286}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "3eb7700937874ed2aed7e5b1bff9cbae", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2022-01-10 21:52:08.625572", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.8.12", '
                             '"data": "model.xgb", "env": "conda.yaml"}, '
                             '"xgboost": {"xgb_version": "1.5.1", "data": '
                             '"model.xgb"}}}]',
 'mlflow.source.name': '/Users/marinaramalhetedesouza/opt/anaconda3/envs/mlflow/lib/python3.8/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'marinaramalhetedesouza'}>, info=<RunInfo: artifact_uri='file:///Users/marinaramalhetedesouza/Documents/MLflow