In [60]:
import os
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient

os.environ['MLFLOW_TRACKING_URI'] = 'http://mlflow.threethirds.ai:30000'

In [83]:
def load_runs(experiment_id, query):
    df = mlflow.search_runs(experiment_id, f'tags.mlflow.runName like "{query}"')
    df["id"] = df["run_id"]
    df["run"] = df["tags.mlflow.runName"]
    df["run"].str.cat(df.groupby("run").cumcount().apply(lambda i: '' if i == 0 else f'_{i}'))  # deduplicate run
    df["env"] = df["params.env_id"].str.replace("DmLab-", "dmlab_")
    df = df[["id", "run", "env"]].sort_values("run")
    return df

def mlflow_get_metrics(run_id, metric, value_column='y', x_bucket=1):
    hist = MlflowClient().get_metric_history(run_id, metric)
    df = pd.DataFrame({
        'x': [(m.step // x_bucket + 1) * x_bucket for m in hist],
        value_column: [m.value for m in hist],
    })
    return df

def load_run_metrics(run, x_bucket):
    run_id = run['id']
    metrics = {
        'return': 'agent/return',
        'return_eval': 'agent/return_eval',
        'agent_steps': 'train/data_steps',
    }
    df = None
    for k, v in metrics.items():
        df1 = mlflow_get_metrics(run_id, v, k, x_bucket).groupby('x').mean()
        df = df1 if df is None else df.combine_first(df1)

    df = df.reset_index().rename(columns={'x': 'train_steps'})
    df['env_steps'] = df['agent_steps'] * 4
    df['env'] = run['env']
    df['run'] = run['run']
    return df

In [89]:
data_runs = df = load_runs(45, 'dmlab_%')
for i, run in data_runs.iterrows():
    df = load_run_metrics(run, 5000)
    df.to_csv(f'online/{run["run"]}.csv', index=False)
