# Exercise in importing MLFlow into MD-Metadata

Takes an MLFlow model,
and demonstrate a possible way to store it along with its metadata,
inside a ml-metadata server.

Setting up environment, including setup of ml-metadata DB (locally)

In [19]:
import os
import mlflow
from ml_metadata.metadata_store import metadata_store
from ml_metadata.proto import metadata_store_pb2
from pprint import pprint
from mlflow import MlflowClient

connection_config = metadata_store_pb2.ConnectionConfig()
connection_config.sqlite.filename_uri = os.path.join(os.getcwd(), "mlmddb")
connection_config.sqlite.connection_mode = 3
store = metadata_store.MetadataStore(connection_config)

try: 
    artifact_type_id = store.get_artifact_type("mlflow.model").id
    print("artifact_type_id was found in DB")
except metadata_store.errors.NotFoundError:
    print("artifact_type_id was not found in DB, creating...")
    new_type = metadata_store_pb2.ArtifactType()
    new_type.name = "mlflow.model"
    artifact_type_id = store.put_artifact_type(new_type)

print(f"Using {artifact_type_id} for storing artifacts")

def fetch_logged_data(run_id):
    client = MlflowClient()
    run = client.get_run(run_id)
    data = run.data
    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in client.list_artifacts(run_id, "model")]
    return data.params, data.metrics, tags, artifacts, dict(run.info)

artifact_type_id was not found in DB, creating...
Using 10 for storing artifacts


## Loading and displaying MLFlow

In [20]:
model = mlflow.sklearn.load_model("runs:/577dc065392a42cfbab7838321b5b3f8/model")
params, metrics, tags, artifacts, run_info = fetch_logged_data("577dc065392a42cfbab7838321b5b3f8")

my_mlflow_data = {"params": params, "metrics": metrics, "tags": tags, "artifacts": artifacts, "run_info": run_info}

pprint(my_mlflow_data)

{'artifacts': ['model/MLmodel',
               'model/conda.yaml',
               'model/model.pkl',
               'model/python_env.yaml',
               'model/requirements.txt'],
 'metrics': {'training_mean_absolute_error': 30.73385403912852,
             'training_mean_squared_error': 1322.8173712632986,
             'training_r2_score': 0.7688950285834599,
             'training_root_mean_squared_error': 36.370556378247755,
             'training_score': 0.7688950285834599},
 'params': {'bootstrap': 'True',
            'ccp_alpha': '0.0',
            'criterion': 'squared_error',
            'max_depth': '6',
            'max_features': '3',
            'max_leaf_nodes': 'None',
            'max_samples': 'None',
            'min_impurity_decrease': '0.0',
            'min_samples_leaf': '1',
            'min_samples_split': '2',
            'min_weight_fraction_leaf': '0.0',
            'n_estimators': '100',
            'n_jobs': 'None',
            'oob_score': 'False',
      

In [21]:
artifact = metadata_store_pb2.Artifact()
artifact.type_id = artifact_type_id
artifact.uri = my_mlflow_data["run_info"]["artifact_uri"]
for k, v in my_mlflow_data.items():
    if type(v) == list:
        artifact.custom_properties[k].struct_value.get_or_create_list(k).extend(v)
    else:
        artifact.custom_properties[k].struct_value.update(v)

[model_artifact_id] = store.put_artifacts([artifact])

## Loading and displaying ML-Metadata

In [22]:
[my_mlmd_data] = store.get_artifacts_by_id([model_artifact_id])
pprint(my_mlmd_data)

id: 1
type_id: 10
uri: "file:///Users/mmortari/git/exercise20230916/mlruns/0/577dc065392a42cfbab7838321b5b3f8/artifacts"
custom_properties {
  key: "artifacts"
  value {
    struct_value {
      fields {
        key: "artifacts"
        value {
          list_value {
            values {
              string_value: "model/MLmodel"
            }
            values {
              string_value: "model/conda.yaml"
            }
            values {
              string_value: "model/model.pkl"
            }
            values {
              string_value: "model/python_env.yaml"
            }
            values {
              string_value: "model/requirements.txt"
            }
          }
        }
      }
    }
  }
}
custom_properties {
  key: "metrics"
  value {
    struct_value {
      fields {
        key: "training_mean_absolute_error"
        value {
          number_value: 30.73385403912852
        }
      }
      fields {
        key: "training_mean_squared_error"
        value 