In [None]:
import json
import os
from copy import deepcopy

import numpy as np
import pandas as pd
import wandb
import matplotlib.pyplot as plt

In [None]:
import matplotlib

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "font.family": "serif",
    "text.usetex": True,
    "pgf.rcfonts": False,
})

In [None]:
%matplotlib inline

## Aggregate triplets of experiments, then generate plots

### Data Preparation

- Use WandB API to get the list of runs from a sweep
- Use the names of the runs to get the folder on the disk
- Extract the config and data for each run from the disk
- Group the triplets, then create the _average metrics by taking the mean

### Generate plots

- Specify the config value which shall be grouped (e.g. brain.type)
- Specify the metric that shall be used for that plot
- Use the prepared data, group by the specified value
- Take the mean, and stddev for each group
- Plot the mean as a line plot, and the stddev as a shaded region
- X axis always "Generation"
- Y axis the metric
    - Maybe add a paramter y_axis_label because the metric might be named differently in the data compared to what we want to present

In [None]:
api = wandb.Api()

In [None]:
sweep_id = "oh6v8v81"

sweep_runs = api.sweep(f"neuroevolution-fzi/AST2023/{sweep_id}").runs
folder_paths = [os.path.join("results", x.name.split("/")[-1]) for x in sweep_runs]

In [None]:
raw_data = []
data_length = None

for folder in folder_paths:
    df = pd.read_json(os.path.join(folder, "Log.json"))
    df = df.drop(df.index[-1], axis=0).drop(columns=["elapsed_time_training", "cpu"])
    
    if data_length is not None:
        assert len(df) == data_length
    else:
        data_length = len(df)
        
    with open(os.path.join(folder, "Configuration.json"), "r") as f:
        cfg = json.load(f) 
    
    raw_data.append((cfg, df))

In [None]:
already_processed_indices = []
grouped_triplets = {}

for i, (cfg, df) in enumerate(raw_data):
    temp_cfg = deepcopy(cfg)
    del temp_cfg["global_seed"]
    
    if i in already_processed_indices:
        continue
    
    grouped_triplets[i] = [(cfg, df)]
        
    for j, (inner_cfg, inner_df) in enumerate(raw_data):
        if i == j:
            continue
            
        inner_tmp_cfg = deepcopy(inner_cfg)
        del inner_tmp_cfg["global_seed"]
        
        if temp_cfg == inner_tmp_cfg:
            grouped_triplets[i].append((inner_cfg, inner_df))
            already_processed_indices.append(j)
            
    already_processed_indices.append(i)

assert len(grouped_triplets) == len(folder_paths) / 3

prepared_data = []

for triplet in grouped_triplets.values():
    assert len(triplet) == 3
    
    triplet_cfg = deepcopy(triplet[0][0])
    del triplet_cfg["global_seed"]
    
    data = []
    
    for (cfg, df) in triplet:
        del cfg["global_seed"]
        assert triplet_cfg == cfg
        
        data.append(df)
        
    grouped_data = pd.concat(data).groupby(level=0)
    mean_data = grouped_data.mean().rename(columns=lambda x: f"{x}_averaged")
    std_data = grouped_data.std().rename(columns=lambda x: f"{x}_std")
    
    prepared_data.append((triplet_cfg, pd.concat([mean_data, std_data], axis=1)))

In [None]:
group_by = "brain.type"
grouped_data = {}

for cfg, data in prepared_data:
    group_by_splitted = group_by.split(".")
    current_cfg = cfg
    
    for cfg_index in group_by_splitted:
        current_cfg = current_cfg[cfg_index]
        
    assert not isinstance(current_cfg, dict)
    
    if current_cfg not in grouped_data:
        grouped_data[current_cfg] = [data]
    else:
        grouped_data[current_cfg].append(data)
        
grouped_data_averaged = {}

for cfg_key, data_list in grouped_data.items():
    grouped = pd.concat(data_list).groupby(level=0)
    
    grouped_data_averaged[cfg_key] = {
        "mean": grouped.mean(),
        "std": grouped.std()
    }

In [None]:
y_label_mapper = {
    "mean_train_averaged": "mean_rew_averaged",
    "best_averaged": "best_averaged"

}

metric_to_plot = "mean_train_averaged"
y_label = y_label_mapper[metric_to_plot]
x_label = "Generation"
alpha = 0.15

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 4))

for cfg_key, data in grouped_data_averaged.items():
    mean_data = data["mean"]
    std_data = data["std"]
    
    metric_data = mean_data[metric_to_plot]
    
    ax.plot(metric_data, label=cfg_key)
    ax.fill_between(mean_data["gen_averaged"],
                    metric_data - std_data[metric_to_plot],
                    metric_data + std_data[metric_to_plot], alpha=alpha)

ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
ax.legend(loc="upper left")