In [None]:
import xgboost as xgb

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [None]:
import mlflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [None]:
data = load_breast_cancer()
features = StandardScaler().fit_transform(data.data)
target = data.target

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(
            features, target, train_size=0.8, shuffle=True, random_state=1)

In [None]:
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

In [None]:
mlflow.set_tracking_uri('sqlite:///database/mlflow.db')
mlflow.set_experiment('breast-cancer-hyperopt')

In [None]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag('Developer', 'Mikhail Stasyuk')
        mlflow.sklearn.autolog()
        
        lasso = Lasso(params['alpha'])
        lasso.fit(X_train, y_train)

        y_pred = lasso.predict(X_train)
        rmse_train = mean_squared_error(y_train, y_pred, squared=False)
        y_pred = lasso.predict(X_valid)
        rmse_valid = mean_squared_error(y_valid, y_pred, squared=False)
        
        print(f'train rmse:{rmse_train} | valid rmse: {rmse_valid}')
        
        return {'loss': rmse_valid, 'status': STATUS_OK}


In [None]:
search_space = {
    'max_iter': 100000,
    'alpha': hp.uniform('alpha', 0.001, 0.005)}

In [None]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

In [None]:
import xgboost as xgb

In [None]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_valid, label=y_valid)

In [100]:
mlflow.set_experiment('breast-cancer-hyperopt-xgb')

<Experiment: artifact_location='/home/rhuubarb/git/mlops-zoomcamp-mikhail-stasyuk/cohorts/2023/02-experiment-tracking/homework/mlruns/5', creation_time=1685464770217, experiment_id='5', last_update_time=1685464770217, lifecycle_stage='active', name='breast-cancer-hyperopt-xgb', tags={}>

In [None]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag('Developer', 'Mikhail Stasyuk')
        mlflow.xgboost.autolog()
        
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )

        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_valid, y_pred, squared=False)
        
        return {'loss': rmse, 'status': STATUS_OK}

In [None]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:squarederror',
    'seed': 42
}

In [None]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

In [None]:
logged_model = 'runs:/07acd9cff4bf4a0cbf46a27305b358d3/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)
loaded_model

In [None]:
import pandas as pd
# Predict on a Pandas DataFrame.
y_pred = loaded_model.predict(pd.DataFrame(features))
y_pred

In [99]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = 'sqlite:///database/mlflow.db'

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [139]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='5',
    filter_string="",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=""
)

In [140]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['validation-rmse']:.4f}")

run id: fd1662bfd0f247979cba488b9ec81226, rmse: 0.2029
run id: b3c9ae23f99e4113a1212275aad77e6e, rmse: 0.2294
run id: 18d3ce85feea45f098d6f34a986af200, rmse: 0.2180
run id: 1d2522c548e64cb396770eeb0f25c939, rmse: 0.2185
run id: 6184ed0f78db4a8a9e58c2edac21ae5a, rmse: 0.2819


In [141]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [142]:
run_id = 'fd1662bfd0f247979cba488b9ec81226'
model_uri = f'runs:/{run_id}/model'
mlflow.register_model(model_uri=model_uri, name='breast-cancer-regressor')

Registered model 'breast-cancer-regressor' already exists. Creating a new version of this model...
2023/05/30 23:13:03 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: breast-cancer-regressor, version 3
Created version '3' of model 'breast-cancer-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1685473983041, current_stage='None', description=None, last_updated_timestamp=1685473983041, name='breast-cancer-regressor', run_id='fd1662bfd0f247979cba488b9ec81226', run_link=None, source='/home/rhuubarb/git/mlops-zoomcamp-mikhail-stasyuk/cohorts/2023/02-experiment-tracking/homework/mlruns/5/fd1662bfd0f247979cba488b9ec81226/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [149]:
model_name = 'breast-cancer-regressor'
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 2, stage: Staging
version: 3, stage: None


In [150]:
client.transition_model_version_stage(
    name=model_name,
    version=3,
    stage='Staging',
    archive_existing_versions=False
)

<ModelVersion: aliases=[], creation_timestamp=1685473983041, current_stage='Staging', description=None, last_updated_timestamp=1685474554532, name='breast-cancer-regressor', run_id='fd1662bfd0f247979cba488b9ec81226', run_link=None, source='/home/rhuubarb/git/mlops-zoomcamp-mikhail-stasyuk/cohorts/2023/02-experiment-tracking/homework/mlruns/5/fd1662bfd0f247979cba488b9ec81226/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [155]:
new_stage = 'Production'
model_version = 1

In [156]:
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

<ModelVersion: aliases=[], creation_timestamp=1685465891122, current_stage='Production', description='', last_updated_timestamp=1685474842660, name='breast-cancer-regressor', run_id='07acd9cff4bf4a0cbf46a27305b358d3', run_link='', source='/home/rhuubarb/git/mlops-zoomcamp-mikhail-stasyuk/cohorts/2023/02-experiment-tracking/homework/mlruns/5/07acd9cff4bf4a0cbf46a27305b358d3/artifacts/model', status='READY', status_message=None, tags={'model': 'xgboostingregressor'}, user_id=None, version=1>

In [158]:
from datetime import datetime

date = datetime.today().date()
client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1685465891122, current_stage='Production', description='The model version 1 was transitioned to Production on 2023-05-30', last_updated_timestamp=1685474935312, name='breast-cancer-regressor', run_id='07acd9cff4bf4a0cbf46a27305b358d3', run_link='', source='/home/rhuubarb/git/mlops-zoomcamp-mikhail-stasyuk/cohorts/2023/02-experiment-tracking/homework/mlruns/5/07acd9cff4bf4a0cbf46a27305b358d3/artifacts/model', status='READY', status_message=None, tags={'model': 'xgboostingregressor'}, user_id=None, version=1>