In [None]:
%matplotlib inline

import pandas as pd
import wandb
import numpy as np
import matplotlib.pyplot as plt

In [None]:
SWEEP_ID = "xstzetj9"
SWEEP_PATH = "neuroevolution-fzi/AST2023/" + SWEEP_ID
NUMBER_OF_DIFFERENT_RANDOM_SEEDS = 3

In [None]:
api = wandb.Api()

sweep_data = api.sweep(SWEEP_PATH)

In [None]:
run_data = []

for run in sweep_data.runs:

    run_data.append((pd.DataFrame(run.scan_history()), run.config))
    
print(f"Number runs: {len(run_data)}")

In [None]:
duplicates = {}
excludes = []
for i, (run, config) in enumerate(run_data):
    
    # Duplicate dict, but remove the entry with "global_seed" as the key, because we want to find duplicates __except__ for
    # the "global_seed"
    temp_dict = {k: v for k, v in config.items() if k != "global_seed"}
    
    if i not in excludes:
        for j, (run2, config2) in enumerate(run_data):
            # i != j to exclude the same run
            if i != j:
                # Same as above: Duplicate config
                temp_dict2 = {k: v for k, v in config2.items() if k != "global_seed"}
                
                # Same config found?
                if temp_dict == temp_dict2:
                    # Try-except simply for creating a list if there is not one already
                    try:
                        duplicates[i].append(j)
                    except KeyError:
                        duplicates[i] = [j]
                    
                    # Run with ID j was now processed, so we do not need to look at it again in the outer loop
                    excludes.append(j)

In [None]:
len(duplicates)

# NUMBER_OF_DIFFERENT_RANDOM_SEEDS - 1 because the key is the first run, and the list should contain two identical runs with a
# different random seed
assert all(len(v) == (NUMBER_OF_DIFFERENT_RANDOM_SEEDS - 1) for v in duplicates.values())

In [None]:
columns_to_extract = ["min_train", "mean_train", "max_train", "min_val", "mean_val", "max_val", "best", "elapsed_time"]

data_of_all_runs = []
for k, v in duplicates.items():
    _, config = run_data[k]
    
    # Exemplary config for that group of runs, where only the global_seed is different
    exemplary_config = {k: v for k, v in config.items() if k != "global_seed"}
    
    run_ids = [k] + v
    
    data = []
    for run_id in run_ids:
        run, config = run_data[run_id]
        
        run_config = {k: v for k, v in config.items() if k != "global_seed"}
        
        # Check again, if all configs of that group are identical
        if run_config != exemplary_config:
            raise RuntimeError(f"Runs that are averaged have not the same config! run_id: {run_id}")
        
        data.append(pd.DataFrame(run[columns_to_extract]))
    
    # Calculate mean and std per generation, then rename the columns to indicate the "new" metric
    averaged_data = pd.concat(data).groupby(level=0).mean().rename(columns=lambda x: x + "_average")
    std_data = pd.concat(data).groupby(level=0).std().rename(columns=lambda x: x + "_std")
    
    data_of_all_runs.append((pd.concat([averaged_data, std_data], axis=1), exemplary_config))

In [None]:
# Now simply create a new WandB experiment per group of identical experiments where only the global_seed was different
for averaged_data, config in data_of_all_runs:
    wandb.init(
        entity="neuroevolution-fzi",
        project="AST2023",
        config=config,
        tags=["averaged-run", "sweep-" + SWEEP_ID]
    )
    
    for i, row in averaged_data.iterrows():
        log_values = row.to_dict()
        log_values["gen"] = i
        wandb.log(log_values)

wandb.finish()