In [67]:
import json
import pprint
import datetime

import ml_metadata
import pandas as pd

from ml_metadata import metadata_store
from ml_metadata.proto import metadata_store_pb2

In [2]:
print(ml_metadata.__version__)

0.29.0


In [33]:
pp = pprint.PrettyPrinter(indent=4)

In [3]:
connection_config = metadata_store_pb2.ConnectionConfig(mysql=metadata_store_pb2.MySQLDatabaseConfig(
    database='mlmd',
    host='mlmddb',
    user='mysql',
    password='mysql',
    port=3306,
))
store = metadata_store.MetadataStore(connection_config)

In [8]:
def _parse_custom_value(value):
    if value.HasField('int_value'):
        return value.int_value
    elif value.HasField('double_value'):
        return value.double_value
    else:
        return _py_value(value.string_value)
    
    
def _py_value(val):
    try:
        return json.loads(val.lower())
    except ValueError:
        return val

In [69]:
def _get_run_info_from_artifact(store, identifier):
    executions_to_artifact = {}
    events = store.get_events_by_artifact_ids([identifier])
    for event in events:
        executions_to_artifact[event.execution_id] = event.artifact_id
    
    artifact_to_run_info = {}
    executions = store.get_executions_by_id(list(executions_to_artifact.keys()))
    for execution in executions:
        artifact_id = executions_to_artifact[execution.id]
        artifact_to_run_info[artifact_id] = {
            'started_at': datetime.datetime.fromtimestamp(execution.create_time_since_epoch // 1000),
            'last_update': datetime.datetime.fromtimestamp(execution.last_update_time_since_epoch // 1000),
        }
    return artifact_to_run_info

In [77]:
artifacts = store.get_artifacts_by_type("io.markow.TrackingResult")

metrics = {}
property_ids = set()

for artifact in artifacts:
    custom_values = {}
    properties = {}
    
    for k, v in artifact.custom_properties.items():
        custom_values[k] = _parse_custom_value(v)
        if k == 'name':
            namespaces = _parse_custom_value(v).split(':')
            custom_values['pipeline'] = namespaces[0]
            custom_values['run_id'] = namespaces[1]
            custom_values['model'] = namespaces[2].split('.')[1]
            
    property_ids = property_ids.union(custom_values.keys())
    
    run_info = _get_run_info_from_artifact(store, artifact.id)
    custom_values.update(run_info[artifact.id])
    

    metrics[artifact.id] = custom_values
    
return metrics, property_ids
    
pp.pprint(metrics)

pd.DataFrame(metrics.values())

{   11: {   'example_count': 2.0,
            'last_update': datetime.datetime(2021, 5, 3, 8, 42, 28),
            'loss': 2.803909,
            'mae': 2.803909,
            'mean_absolute_error': 2.803909,
            'model': 'regression_simple',
            'name': 'auto-mpg-postgres:20210503-084041.665048:TrackingPublisher.regression_simple:tracking_results:0',
            'pipeline': 'auto-mpg-postgres',
            'run_id': '20210503-084041.665048',
            'started_at': datetime.datetime(2021, 5, 3, 8, 42, 27),
            'tfx_version': '0.29.0',
            'weighted_example_count': 2.0},
    23: {   'example_count': 4.0,
            'last_update': datetime.datetime(2021, 5, 3, 8, 45, 35),
            'loss': 0.938006,
            'mae': 0.938006,
            'mean_absolute_error': 0.938006,
            'model': 'regression_dnn',
            'name': 'auto-mpg-postgres:20210503-084342.683215:TrackingPublisher.regression_dnn:tracking_results:0',
            'pipeline': 'aut

Unnamed: 0,mae,weighted_example_count,example_count,tfx_version,loss,mean_absolute_error,name,pipeline,run_id,model,started_at,last_update
0,2.803909,2.0,2.0,0.29.0,2.803909,2.803909,auto-mpg-postgres:20210503-084041.665048:Track...,auto-mpg-postgres,20210503-084041.665048,regression_simple,2021-05-03 08:42:27,2021-05-03 08:42:28
1,0.938006,4.0,4.0,0.29.0,0.938006,0.938006,auto-mpg-postgres:20210503-084342.683215:Track...,auto-mpg-postgres,20210503-084342.683215,regression_dnn,2021-05-03 08:45:33,2021-05-03 08:45:35
2,1.399343,2.0,2.0,0.29.0,1.399343,1.399343,auto-mpg-postgres:20210503-085031.896911:Track...,auto-mpg-postgres,20210503-085031.896911,regression_dnn,2021-05-03 08:52:32,2021-05-03 08:52:34
3,2.203678,77.0,77.0,0.29.0,2.203678,2.203678,auto-mpg-postgres:20210503-095139.719179:Track...,auto-mpg-postgres,20210503-095139.719179,regression_dnn,2021-05-03 09:53:27,2021-05-03 09:53:29
4,1.179773,2.0,2.0,0.29.0,1.179773,1.179773,auto-mpg-postgres:20210503-095732.394489:Track...,auto-mpg-postgres,20210503-095732.394489,regression_dnn,2021-05-03 09:59:37,2021-05-03 09:59:38
5,1.702325,73.0,73.0,0.29.0,1.702325,1.702325,auto-mpg-postgres:20210503-100010.050209:Track...,auto-mpg-postgres,20210503-100010.050209,regression_dnn,2021-05-03 10:02:08,2021-05-03 10:02:10
6,2.332426,1.0,1.0,0.29.0,2.332426,2.332426,auto-mpg-postgres:20210503-100241.363067:Track...,auto-mpg-postgres,20210503-100241.363067,regression_dnn,2021-05-03 10:04:34,2021-05-03 10:04:36
7,3.773273,1.0,1.0,0.29.0,3.773273,3.773273,auto-mpg-postgres:20210503-100803.860371:Track...,auto-mpg-postgres,20210503-100803.860371,regression_dnn,2021-05-03 10:10:00,2021-05-03 10:10:01
8,1.009556,4.0,4.0,0.29.0,1.009556,1.009556,auto-mpg-postgres:20210503-101218.554732:Track...,auto-mpg-postgres,20210503-101218.554732,regression_dnn,2021-05-03 10:14:20,2021-05-03 10:14:21
9,,,,0.29.0,1.930636,1.930636,auto-mpg-postgres:20210503-102124.944080:Track...,auto-mpg-postgres,20210503-102124.944080,regression_dnn,2021-05-03 10:23:19,2021-05-03 10:23:21
