In [1]:
import wandb
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import dill
import copy
import numpy as np

plt.rcParams["axes.xmargin"] = 0

In [2]:
def get_sweeps(project):
    sweeps = []
    for sweep in project:
        sweeps.append(sweep.id)
    return sweeps[::-1]


def get_run_links(sweeps, project_name):
    runs = []
    for sweep_link in sweeps:
        sweep = wandb.Api().sweep(f"lucacorbucci/{project_name}/{sweep_link}")
        run_list_sweep = []
        for run in sweep.runs:
            run_list_sweep.append(run.id)
        runs.append(run_list_sweep[::-1])
    return runs


def get_run_data(run_links_per_sweep, project_name):
    run_data = []
    for sweep in run_links_per_sweep:
        tmp_run_data = []
        for run_link in sweep:
            run = wandb.Api().run(f"lucacorbucci/{project_name}/{run_link}")
            tmp_run_data.append(pd.DataFrame(run.scan_history()))
        run_data.append(tmp_run_data)
    return run_data


def remove_nan(column_names, dataframe):
    column_names = [
        column_name for column_name in column_names if column_name in dataframe.columns
    ]

    current_df = dataframe[column_names]
    # consider each column in training_data_disparity independently and
    # remove the rows where we have NaN
    new_columns = []

    for column in current_df.columns:
        new_values = list(current_df[column].dropna())
        new_columns.append(new_values)

    # if the lists have different lengths, we need to modify them so that
    # we have the same length:
    min_size = min([len(item) for item in new_columns])
    new_columns = [item[:min_size] for item in new_columns]

    # create the new dataframe with baseline_test_nodes_columns_disparity_dataset as columns
    # names and new_columns as values
    new_df = pd.DataFrame(dict(zip(column_names, new_columns)))
    return new_df


def create_avg_dataset(new_list):
    # compute the mean and the std of the data in new_list
    # and create a new dataset with these values
    mean = np.mean(new_list, axis=0)
    std = np.std(new_list, axis=0)

    df = pd.DataFrame()
    new_mean = []
    current_max = 0
    for value in mean:
        if value > current_max:
            current_max = value
            new_mean.append(current_max)
        else:
            new_mean.append(None)
    df["mean"] = mean
    df["dots"] = new_mean
    df["std"] = std
    df["index"] = list(range(0, len(mean)))
    return df


def prepare_pareto_frontier(data_lists):
    custom_metrics = []
    current_max = 0
    for value in data_lists:
        if value >= current_max:
            current_max = value
        custom_metrics.append(current_max)

    return custom_metrics


def create_avg_dataset(new_list):
    # compute the mean and the std of the data in new_list
    # and create a new dataset with these values
    mean = np.mean(new_list, axis=0)
    std = np.std(new_list, axis=0)

    df = pd.DataFrame()
    new_mean = []
    current_max = 0
    for value in mean:
        if value > current_max:
            current_max = value
            new_mean.append(current_max)
        else:
            new_mean.append(None)
    df["mean"] = mean
    df["dots"] = new_mean
    df["std"] = std
    df["index"] = list(range(0, len(mean)))
    return df


def extract_last_custom_metrics(sweep):
    custom_metrics = []
    for sweep_dfs in sweep:
        tmp_custom_metrics = []
        for df in sweep_dfs:
            custom_metric = remove_nan(["Custom_metric"], df)
            tmp_custom_metrics.append(custom_metric.values.tolist()[-1][0])
        custom_metrics.append(tmp_custom_metrics)
    return custom_metrics

In [3]:
Dutch_Baseline_05_project_name = "Dutch_Baseline_05"
Dutch_Baseline_05_project = wandb.Api().project(Dutch_Baseline_05_project_name).sweeps()
Dutch_Baseline_05_sweeps = get_sweeps(Dutch_Baseline_05_project)
Dutch_Baseline_05_run_links = get_run_links(
    Dutch_Baseline_05_sweeps, Dutch_Baseline_05_project_name
)
data = get_run_data(Dutch_Baseline_05_run_links, Dutch_Baseline_05_project_name)

In [None]:
custom_metrics = extract_last_custom_metrics(data)

In [None]:
pareto_list = prepare_pareto_frontier(custom_metrics)

In [None]:
df_pareto = create_avg_dataset(pareto_list)