In [1]:
import os
import pickle
import click
import mlflow
from joblib import load, dump
from google.cloud import storage
from mlflow.pyfunc import load_model

from mlflow.entities import ViewType
from mlflow.tracking import MlflowClient
from catboost import Pool, CatBoostRegressor
from sklearn.metrics import mean_squared_error

In [2]:
EXPERIMENT_NAME = "catboost-stack-overflow-train"
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='gs://rh-mlflow-cb-stack-overflow/1', creation_time=1690306440261, experiment_id='1', last_update_time=1690306440261, lifecycle_stage='active', name='catboost-stack-overflow-train', tags={}>

In [3]:
client = MlflowClient()

In [4]:
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
best_run = client.search_runs(
    experiment_ids=experiment.experiment_id,
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=10,
    order_by=["metrics.test_rmse ASC"],
)[0]

In [5]:
best_run

<Run: data=<RunData: metrics={'rmse_bs_test': 59.11909494549302, 'rmse_test': 34.34276309658149}, params={'depth': '4',
 'iterations': '957',
 'l2_leaf_reg': '17',
 'learning_rate': '0.18094505491408697',
 'loss_function': 'RMSE',
 'random_seed': '14',
 'verbose': '0'}, tags={'mlflow.runName': 'beautiful-bat-446',
 'mlflow.source.name': 'experiment-tracking/train.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'ranga'}>, info=<RunInfo: artifact_uri='gs://rh-mlflow-cb-stack-overflow/1/713593712ad44598aae9993971aa1918/artifacts', end_time=1691788604281, experiment_id='1', lifecycle_stage='active', run_id='713593712ad44598aae9993971aa1918', run_name='beautiful-bat-446', run_uuid='713593712ad44598aae9993971aa1918', start_time=1691788542812, status='FINISHED', user_id='ranga'>, inputs=<RunInputs: dataset_inputs=[]>>

In [6]:
run_id = best_run.info.run_id

In [7]:
run_id

'713593712ad44598aae9993971aa1918'

In [8]:
bucket_name = 'rh-mlflow-cb-stack-overflow'

model_uri = f"gs://{bucket_name}/1/{run_id}/artifacts/model/"
local_model_path = "model/"

# Create the local directory if it doesn't exist
os.makedirs(local_model_path, exist_ok=True)

def download_blob(bucket_name, source_blob_name, destination_file_name):
    """Downloads a blob from the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)
    print(f"Blob {source_blob_name} downloaded to {destination_file_name}")

download_blob(bucket_name, f"1/{run_id}/artifacts/model/sf_catboost.bin", f"{local_model_path}sf_catboost.bin")

Blob 1/713593712ad44598aae9993971aa1918/artifacts/model/sf_catboost.bin downloaded to model/sf_catboost.bin


In [None]:
# # load the model into memory
# loaded_model = load('model/sf_catboost.bin')

In [12]:
mlflow.register_model(model_uri, name="catboost-best-model")

Registered model 'catboost-best-model' already exists. Creating a new version of this model...
2023/08/12 00:55:23 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: catboost-best-model, version 3
Created version '3' of model 'catboost-best-model'.


<ModelVersion: aliases=[], creation_timestamp=1691801723735, current_stage='None', description='', last_updated_timestamp=1691801723735, name='catboost-best-model', run_id='', run_link='', source='gs://rh-mlflow-cb-stack-overflow/1/713593712ad44598aae9993971aa1918/artifacts/model/', status='READY', status_message='', tags={}, user_id='', version='3'>