# Convert log files of MLflow and WandB to csv files

In [None]:
import os

import mlflow
import wandb
import pandas as pd
import numpy as np

## MLflow

In [None]:
os.chdir("..")

In [None]:
# Replace following experiment id to your desired one
experiment_ids = ["1"]

runs_df = mlflow.search_runs(experiment_ids=experiment_ids)
runs_df

In [None]:
runs = mlflow.search_runs(experiment_ids=experiment_ids, output_format="list")
keys = runs[0].data.metrics.keys()
client = mlflow.tracking.MlflowClient()

In [None]:
param_keys = ["experiment.seed", "algorithm.name", "env.name"]
extra_keys = [
    "algorithm.target_kl",
]
param_keys += extra_keys

In [None]:
def convert_metric_history_to_dataframe(run):
    run_id = run.info.run_id
    df = pd.DataFrame()
    for key in keys:
        history = client.get_metric_history(run_id=run_id, key=key)
        df = pd.concat([df, pd.DataFrame({key: [m.value for m in history]})], axis=1)
    df["run_id"] = run_id
    for param_key in param_keys:
        df[param_key] = run.data.params[param_key]
    return df

In [None]:
df_list = []
for run in runs:
    df_list.append(convert_metric_history_to_dataframe(run))
df = pd.concat(df_list)

In [None]:
df

In [None]:
df_final_score = df.dropna(subset=["demo/episode_reward_best_model"])[[
    "demo/episode_reward_best_model",
    "demo/episode_reward_final_model",
    "time/iterations",
    "experiment.seed",
    "algorithm.name",
    "env.name",
    "algorithm.target_kl"
]]
df_final_score

In [None]:
df_train = df.dropna(subset=["eval/mean_reward"])[[
    "eval/mean_reward",
    "train/n_updates",
    "train/s_over_gamma",
    "train/estimated_snr",
    "experiment.seed",
    "algorithm.name",
    "env.name",
    "algorithm.target_kl"
]]
df_train

In [None]:
df_final_score.to_csv("data/test/final_score.csv")
df_train.to_csv("data/test/train.csv")

## WandB

In [None]:
api = wandb.Api(timeout=29)

In [None]:
from collections.abc import MutableMapping

def flatten(dictionary, parent_key='', separator='.'):
    items = []
    for key, value in dictionary.items():
        new_key = parent_key + separator + key if parent_key else key
        if isinstance(value, MutableMapping):
            items.extend(flatten(value, new_key, separator=separator).items())
        else:
            items.append((new_key, value))
    return dict(items)

In [None]:
# Replace following placeholders with your entity and project
entity, project = "<entity>", "<project>"

In [None]:
all_tasks = [
    "hopper",
    "walker2d",
    "ant",
    "halfcheetah",
    "swimmer",
    "reacher",
    "inverted_double_pendulum",
    "pusher",
]

In [None]:
filters = {"$or": [{"config.env.name": task} for task in all_tasks]}

In [None]:
runs = []
runs += api.runs(path=f"{entity}/{project}", filters=filters)
len(runs)

In [None]:
demo_metric_key = "demo/episode_reward_final_model"
demo_metric_key_old = "demo/episode_reward"
demo_best_metric_key = "demo/episode_reward_best_model"
train_metric_keys = [
    "eval/mean_reward",
    "train/target_kl",
    "train/s_over_gamma",
    "train/estimated_snr",
]

In [None]:
df_train = pd.DataFrame()
for run in runs:
    config = flatten({k: v for k, v in run.config.items() if not k.startswith("_")})
    run_dict = {
        "env": config["env.name"],
        "seed": config["experiment.seed"],
        "algorithm": config["algorithm.name"],
        "init_target_kl": config["algorithm.target_kl"],
    }
    if config["algorithm.name"] == "adaptive-trpo":
        run_dict.update(
            target_snr=config["algorithm.kl_update_ratio_func.target_snr"],
            update_strength=config["algorithm.kl_update_ratio_func.kl_update_strength_func.const"],
            smoothing_coeff=config["algorithm.smoothing_coeff_func.const"],
        )
    rows = []
    for i, row in enumerate(run.scan_history(keys=train_metric_keys)):
        row_dict = dict(**run_dict, **row)
        row_dict["n_updates"] = i + 1
        rows.append(row_dict)
    run_df = pd.DataFrame(rows)
    df_train = pd.concat([df_train, run_df])
df_train

In [None]:
df_final_score = pd.DataFrame()
for run in runs:
    config = flatten({k: v for k, v in run.config.items() if not k.startswith("_")})
    rewards = [row[demo_metric_key] for row in run.scan_history(keys=[demo_metric_key])]
    if len(rewards) == 0:
        rewards = [row[demo_metric_key_old] for row in run.scan_history(keys=[demo_metric_key_old])]
    run_dict = {
        "env": config["env.name"],
        "seed": config["experiment.seed"],
        "algorithm": config["algorithm.name"],
        "init_target_kl": config["algorithm.target_kl"],
        "rewards": rewards,
        "rewards_best_model": [row.get(demo_best_metric_key) for row in run.scan_history(keys=[demo_best_metric_key])],
    }
    if config["algorithm.name"] == "adaptive-trpo":
        run_dict.update(
            target_snr=config["algorithm.kl_update_ratio_func.target_snr"],
            update_strength=config["algorithm.kl_update_ratio_func.kl_update_strength_func.const"],
            smoothing_coeff=config["algorithm.smoothing_coeff_func.const"],
        )
    run_df = pd.DataFrame(run_dict)
    df_final_score = pd.concat([df_final_score, run_df])
df_final_score

In [None]:
df_train = df_train[["env", "seed", "algorithm", "init_target_kl", "eval/mean_reward", "train/s_over_gamma", "train/estimated_snr", "n_updates"]]
df_train

In [None]:
df_final_score.to_csv("../data/test/final_score.csv")
df_train.to_csv("../data/test/train.csv")