In [None]:
!pip install pandas

### Install the _kfmd_ library

In [86]:
# use local `kfmd` library
import sys
sys.path
sys.path.append('/home/jovyan/work/sdk/python/kfmd')

# To use the latest publish `kfmd` library, run
# !pip install kfmd

In [87]:
import metadata
import pandas
from datetime import datetime

### Create a workspace

In [88]:
ws1 = metadata.Workspace(
    backend_url_prefix="127.0.0.1:8080",
    name="ws1",
    description="a workspace for testing",
    labels={"n1": "v1"})

### Create a run in a workspace

In [89]:
r = metadata.Run(
    workspace=ws1,
    name="run-" + datetime.utcnow().isoformat("T") ,
    description="a run in ws_1",
)

### Create an execution in a run

In [90]:
exec = metadata.Execution(
    name = "execution" + datetime.utcnow().isoformat("T") ,
    workspace=ws1,
    run=r,
    description="execution example",
)
print("An execution is create with id %s" % exec.id)

An execution is create with id 5


### Log a data set

In [91]:
data_set = exec.log_input(
        metadata.DataSet(
            description="an example data",
            name="mytable-dump",
            owner="owner@my-company.org",
            uri="file://path/to/dataset",
            version="v1.0.0",
            query="SELECT * FROM mytable"))
assert data_set.id
print("data set id is %s" % data_set.id)

data set id is 13


### Log a model

In [92]:
model = exec.log_output(
    metadata.Model(
            name="MNIST",
            description="model to recognize handwritten digits",
            owner="someone@kubeflow.org",
            uri="gcs://my-bucket/mnist",
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v1.0"
            },
            hyperparameters={
                "learning_rate": 0.5,
                "layers": [10, 3, 1],
                "early_stop": True
            },
            version="v0.0.1",
            labels={"mylabel": "l1"}))
assert model.id
print("model id is %s" % model.id)

model id is 14


### Log an evaluation of a model

In [93]:
metrics = exec.log_output(
    metadata.Metrics(
            name="MNIST-evaluation",
            description="validating the MNIST model to recognize handwritten digits",
            owner="someone@kubeflow.org",
            uri="gcs://my-bucket/mnist-eval.csv",
            data_set_id=data_set.id,
            model_id=model.id,
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": 0.95},
            labels={"mylabel": "l1"}))
assert metrics.id
print("metrics id is %s" % model.id)

metrics id is 14


### List all models in the workspace

In [94]:
pandas.DataFrame.from_dict(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME))

Unnamed: 0,create_time,description,hyperparameters,id,labels,model_type,name,owner,run,training_framework,uri,version,workspace
0,2019-06-27T00:14:51.086151Z,model to recognize handwritten digits,"{'learning_rate': 0.5, 'layers': [10, 3, 1], '...",2,{'mylabel': 'l1'},neural network,MNIST,someone@kubeflow.org,run-2019-06-27T00:14:51.019588,"{'name': 'tensorflow', 'version': 'v1.0'}",gcs://my-bucket/mnist,v0.0.1,ws1
1,2019-06-27T00:15:19.060262Z,model to recognize handwritten digits,"{'learning_rate': 0.5, 'layers': [10, 3, 1], '...",5,{'mylabel': 'l1'},neural network,MNIST,someone@kubeflow.org,run-2019-06-27T00:15:18.988422,"{'name': 'tensorflow', 'version': 'v1.0'}",gcs://my-bucket/mnist,v0.0.1,ws1
2,2019-06-27T00:15:25.167876Z,model to recognize handwritten digits,"{'learning_rate': 0.5, 'layers': [10, 3, 1], '...",8,{'mylabel': 'l1'},neural network,MNIST,someone@kubeflow.org,run-2019-06-27T00:15:25.097153,"{'name': 'tensorflow', 'version': 'v1.0'}",gcs://my-bucket/mnist,v0.0.1,ws1
3,2019-06-27T00:16:41.696274Z,model to recognize handwritten digits,"{'learning_rate': 0.5, 'layers': [10, 3, 1], '...",14,{'mylabel': 'l1'},neural network,MNIST,someone@kubeflow.org,run-2019-06-27T00:16:41.641078,"{'name': 'tensorflow', 'version': 'v1.0'}",gcs://my-bucket/mnist,v0.0.1,ws1


### Basic Lineage Tracking

In [95]:
print("model id is %s\n" % model.id)
    
# Find the execution that produces this model.
output_events = ws1._client.search_events2(model.id).events
assert len(output_events) == 1
execution_id = output_events[0].execution_id

# Find all events related to that execution.
all_events = ws1._client.search_events(execution_id).events
assert len(all_events) == 3

print("\nAll events related to this model:")
pandas.DataFrame.from_dict([e.to_dict() for e in all_events])

model id is 14


All events related to this model:


Unnamed: 0,artifact_id,execution_id,milliseconds_since_epoch,path,type
0,13,5,1561594601677,,INPUT
1,14,5,1561594601720,,OUTPUT
2,15,5,1561594601756,,OUTPUT
