In [None]:
import os
import math
import pandas as pd
from tbparse import SummaryReader

# log_dir = os.path.expanduser("~/lis-cluster/emergent_communication/emergent_communication/lightning_logs")
log_dir = os.path.expanduser("~/PhD/emergent_communication/emergent_communication/lightning_logs_cluster")

reader = SummaryReader(log_dir, extra_columns={'dir_name'}, pivot=True)
df = reader.scalars

In [None]:
df.dropna(subset=["best_val_acc_no_noise"], inplace=True)
unfinished_runs = df[df["best_val_acc_no_noise"].apply(isinstance, args=(list, ))]
print(f"Unfinished/Duplicate runs: {unfinished_runs.dir_name.unique()}")
df = df[~df["best_val_acc_no_noise"].apply(isinstance, args=(list,))].copy()

# Keep only best step values for each run
df["best_val_acc_no_noise"] = df["best_val_acc_no_noise"].astype(float)
indices_best_steps = df.groupby(["dir_name"])["best_val_acc_no_noise"].idxmax()
df = df.loc[indices_best_steps]
df

In [None]:
hp = reader.hparams
hp = hp[~hp["num_values"].apply(isinstance, args=(list,))].copy()
hp

In [None]:
df.set_index("dir_name", inplace=True)
hp.set_index("dir_name", inplace=True)
df = df.join(hp, how="left")

In [None]:
df.dropna(subset=["num_attributes", "num_values"], inplace=True)

assert (df.sender_entropy_coeff == df.receiver_entropy_coeff).all()
assert (df.num_senders == df.num_receivers).all()

df["entropy_coeff"] = df["sender_entropy_coeff"]
df["num_agents"] = df["num_senders"]

runs_best_entropy = []

df["attr_val"] = df["num_attributes"].map(int).map(str) + "_" + df["num_values"].map(int).map(str)
attr_val_combinations = df["attr_val"].unique()

num_agents_values = df["num_agents"].unique()
for num_agents in num_agents_values:
    print(f"\nNum agents: {num_agents}")

    for attr_val in attr_val_combinations:
        n_attributes = int(float(attr_val.split("_")[0]))
        n_values = int(float(attr_val.split("_")[1]))
        print(f"\n\t\tAttr: {n_attributes} Values: {n_values}")

        length_cost_values = df["length_cost"].unique()
        for length_cost in length_cost_values:
            print(f"\t\tLength cost: {length_cost}")

            noise_values = df["noise"].unique()

            for noise in noise_values:
                print(f"\t\t\t\tNoise: {noise}")

                for feedback in (0, 1):
                    print("\t\t\t\t\tFeedback") if feedback else print("\t\t\t\t\tBaseline")

                    df_config = df[(df.attr_val == attr_val) & (df.length_cost == length_cost) & (df.feedback == feedback) & (df.num_agents == num_agents) & (df.noise == noise)]

                    print(f"\t\t\t\t\tFound {len(df_config)} runs for config") #: {df_config}

                    if len(df_config) == 0:
                        continue

                    avg_val_accs = df_config.groupby("sender_entropy_coeff").aggregate({"best_val_acc_no_noise": "mean"})
                    print("\t\t\t\t\t", avg_val_accs.to_dict())
                    # Take highest entropy coeff in case of tie
                    best_entropy_coeff = avg_val_accs[avg_val_accs.best_val_acc_no_noise == avg_val_accs.max()[0]].index[-1]

                    print("\t\t\t\t\tbest entropy coeff: ", best_entropy_coeff)
                    df_best_entropy = df_config[df_config.entropy_coeff == best_entropy_coeff]

                    runs_best_entropy.append(df_best_entropy)

data = pd.concat(runs_best_entropy, ignore_index=True)


In [None]:
def calc_capacity(row):
    return math.pow(row.num_values, row.num_attributes)

data["capacity"] = data.apply(calc_capacity, axis=1)
data.sort_values("capacity", inplace=True)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


data["condition"] = data["noise"].map(lambda x: f"noise_{x}" if x > 0 else "baseline") + data["length_cost"].map(lambda x: f"_length_cost_{x}" if x > 0 else "") + data["feedback"].map(lambda x: "_feedback" if x else "")

# Exclude bad noise setting
target_data = data[~(data.noise == 0.3)]

ATTR_VAL = "4_4"
target_data = target_data[(target_data["attr_val"] == ATTR_VAL)]

# target_hparam = "length_cost"
target_hparam = "attr_val"

_, axes = plt.subplots(3, 2, figsize=(50, 30))

# hue_order = None
# hue_order = ["baseline", "noise_0.1", "noise_0.1_length_cost_0.001", "noise_0.1_length_cost_0.001_feedback"]
hue_order = ["baseline", "noise_0.1_feedback", "noise_0.1_length_cost_0.001_feedback", "noise_0.1_length_cost_0.01_feedback"]


next_axis_idx = (0, 0)
for metric in ["val_acc_no_noise", "test_acc_no_noise", "topsim", "posdis", "bosdis"]:
    sns.boxplot(ax=axes[next_axis_idx], data=target_data, x=target_hparam, y=metric, hue="condition", hue_order=hue_order, boxprops=dict(alpha=.5), showfliers = False)
    ax = sns.swarmplot(ax=axes[next_axis_idx], data=target_data, x=target_hparam, y=metric, hue="condition", dodge=True)
    handles, labels = ax.get_legend_handles_labels()
    num_conditions = int(len(handles)/2) if not hue_order else len(hue_order)
    ax.legend(handles[:num_conditions], labels[:num_conditions])
    if next_axis_idx[1] >= axes.shape[1]-1:
        next_axis_idx = (next_axis_idx[0]+1, 0)
    else:
        next_axis_idx = (next_axis_idx[0], next_axis_idx[1]+1)


name = log_dir.split("/")[-1]
plt.savefig("plots/"+name+".pdf", dpi=300)

