# Model tracking with MLflow

MLflow is an open source platform for managing the end-to-end machine learning lifecycle. It tackles four primary functions:

* Tracking experiments to compare parameters and results (MLflow Tracking).
* Model versioning
* Support for serving models
* Packaging of ML code

## Documentation
* Documentation: https://mlflow.org/docs/latest/index.html
* Tracking API: https://mlflow.org/docs/latest/python_api/index.html


In [None]:
import os
import mlflow
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
mlflow_path = "mlflow_path"
if os.path.isdir(mlflow_path):
    shutil.rmtree(mlflow_path, ignore_errors=True)

In [None]:
mlflow.start_run()

In [None]:
n_estimators = 100
max_depth = 10

mlflow.log_param("n_estimators", n_estimators)
mlflow.log_param("max_depth", max_depth)

In [None]:
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)
print(X_train.shape, y_train.shape)
print(X_train[0,:])
print(y_train[0])

In [None]:
model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)
model.fit(X_train, y_train)

In [None]:
pred = model.predict(X_test)
print(pred.shape)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, pred))
mae = mean_absolute_error(y_test, pred)
print(rmse)
print(mae)
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("mae", mae)

In [None]:
mlflow.sklearn.log_model(model, "model")

In [None]:
mlflow.end_run()

In [None]:
logged_model = ''

In [None]:
model2 = mlflow.sklearn.load_model(logged_model)
pred2 = model2.predict(X_test)
rmse2 = np.sqrt(mean_squared_error(y_test, pred2))
mae2 = mean_absolute_error(y_test, pred2)
print(rmse2)
print(mae2)

In [None]:
!mlflow ui

In [None]:
if os.path.isdir(mlflow_path):
    shutil.rmtree(mlflow_path, ignore_errors=True)