In [1]:
# Import necessary libraries
import re
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os


In [2]:
# Define function to parse logs
def parse_logs(log_file):
    
    # Initialize variables
    data = []
    current_round = None
    current_network = None
    last_global_top1 = None
    last_global_top5 = None

    with open(log_file, 'r') as file:
        for line in file:
            round_match = re.search(r"in comm round:(\d+)", line)
            network_match = re.search(r"Training network (\d+)", line)
            epoch_match = re.search(r"Epoch: (\d+) Loss: (-?\d+\.\d+)", line)
            global_top1_match = re.search(r"Global Model Test accuracy Top1: (\d+\.\d+)", line)
            global_top5_match = re.search(r"Global Model Test accuracy Top5: (\d+\.\d+)", line)

            if round_match: current_round = int(round_match.group(1))
            elif network_match: current_network = int(network_match.group(1))
            elif epoch_match:
                epoch = int(epoch_match.group(1))
                loss = float(epoch_match.group(2))
                data.append([current_round, current_network, epoch, loss, last_global_top1, last_global_top5])
            elif global_top1_match: last_global_top1 = float(global_top1_match.group(1))
            elif global_top5_match: last_global_top5 = float(global_top5_match.group(1))
            
            df = pd.DataFrame(data, columns=['comm_round', 'net', 'epch', 'loss', 'g_acc@1', 'g_acc@5'])

    return df


In [3]:
# Function to extract metadata from log filename
def extract_metadata(filename):
    match = re.match(
        r"(?P<dataset>[^-]+)-(?P<portion>[^-]+)-(?P<method>[^-]+)-(?P<batch_size>[^-]+)-(?P<n_parties>[^-]+)-(?P<temperature>[^-]+)-(?P<tt>[^-]+)-(?P<ts>[^-]+)-(?P<epochs>[^_]+)_log-(?P<timestamp>\d{4}-\d{2}-\d{2}-\d{2}\d{2}-\d{2})",
        filename
    )
    if match:
        return match.groupdict()
    return {}


In [4]:
# Load data from all log files in the logs folder
log_folder = "./logs"
log_files = [os.path.join(log_folder, f) for f in os.listdir(log_folder) if f.endswith(".log")]

# Process and combine data from all log files
dataframes = {}
for log_file in log_files:
  filename = os.path.basename(log_file)
  metadata = extract_metadata(filename)
  df = parse_logs(log_file)
  for key, value in metadata.items():
    df[key] = value
  dataframes[filename] = df
  

In [5]:
# Combine dataframes
combined_df = pd.concat(dataframes.values(), ignore_index=True)

# Split method into method and rel_loss
combined_df[['method', 'rel_loss']] = combined_df['method'].str.split('_', n=1, expand=True)

# Normalize losses to compare them
combined_df['normalized_loss'] = combined_df.groupby('method')['loss'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

# Seaborn gridstyle setting
sns.set_style("darkgrid")

In [None]:
# Plot Losses
g = sns.relplot(
    data=combined_df,
    x="comm_round", 
    y="normalized_loss", 
    kind="line",
    hue="method",
    style="rel_loss",
    marker="o",
    col="dataset",
    height=4,               
    aspect=1.2             
)
plt.subplots_adjust(top=0.85)

# Set axes, titles and legend
g.set_axis_labels("Communication Round", "Normalized Loss")
title_map = {"cifar10": "CIFAR-10", "svhn": "SVHN"}
for dataset_val, ax in g.axes_dict.items():
    ax.set_title(title_map[dataset_val])
legend = g._legend_out
new_labels = ["Method", "Default", "SimSiam", "Variant", "w/o RLoss", "w RLoss"]
for text_obj, new_label in zip(g._legend.texts, new_labels):
    text_obj.set_text(new_label)

plt.savefig('./figure/loss.png')
plt.show()

In [None]:
# Plot Top-1 Accuracy
g = sns.relplot(
    data=combined_df,
    x="comm_round", 
    y="g_acc@1", 
    kind="line",
    hue="method",
    style="rel_loss",
    marker="o",
    col="dataset",
    height=4,               
    aspect=1.2             
)
plt.subplots_adjust(top=0.85)

# Set axes, titles and legend
g.set_axis_labels("Communication Round", "Top-1 Accuracy")
title_map = {"cifar10": "CIFAR-10", "svhn": "SVHN"}
for dataset_val, ax in g.axes_dict.items():
    ax.set_title(title_map[dataset_val])
legend = g._legend_out
new_labels = ["Method", "Default", "SimSiam", "Variant", "w/o RLoss", "w RLoss"]
for text_obj, new_label in zip(g._legend.texts, new_labels):
    text_obj.set_text(new_label)

plt.savefig('./figure/top-1.png')
plt.show()

In [None]:
# Plot Top-5 Accuracy
g = sns.relplot(
    data=combined_df,
    x="comm_round", 
    y="g_acc@5", 
    kind="line",
    hue="method",
    style="rel_loss",
    marker="o",
    col="dataset",
    height=4,               
    aspect=1.2             
)
plt.subplots_adjust(top=0.85)

# Set axes, titles and legend
g.set_axis_labels("Communication Round", "Top-5 Accuracy")
title_map = {"cifar10": "CIFAR-10", "svhn": "SVHN"}
for dataset_val, ax in g.axes_dict.items():
    ax.set_title(title_map[dataset_val])
legend = g._legend_out
new_labels = ["Method", "Default", "SimSiam", "Variant", "w/o RLoss", "w RLoss"]
for text_obj, new_label in zip(g._legend.texts, new_labels):
    text_obj.set_text(new_label)

plt.savefig('./figure/top-5.png')
plt.show()

In [None]:
# Create a copy of the original DataFrame.
dfcopy = combined_df.copy()

# Create a display column combining the method name and relative loss flag.
dfcopy['method'] = dfcopy.apply(
    lambda row: f"{row['method']} + rel_loss" if row['rel_loss'] == "rel_loss" else row['method'],
    axis=1
)

def compute_metrics(sub_df):
    """Compute the last and best top-5 accuracy for a given subgroup."""
    sub_df = sub_df.sort_values("comm_round")
    last_acc = sub_df.iloc[-1]["g_acc@5"]
    best_acc = sub_df["g_acc@5"].max()
    return pd.Series({"Last Acc Top5": last_acc, "Best Acc Top5": best_acc})

# Group by method_display and dataset, then compute the metrics.
agg_df = (
    dfcopy.groupby(["method", "dataset"], group_keys=False)
    .apply(compute_metrics, include_groups=False)
    .reset_index()
)

# Pivot the table so that each dataset becomes the outer column with two subcolumns.
pivot_df = agg_df.set_index(["method", "dataset"])[["Last Acc Top5", "Best Acc Top5"]].unstack("dataset")
pivot_df = pivot_df.swaplevel(axis=1).sort_index(axis=1, level=0).reset_index()

# Print the final table without showing the DataFrame index.
print(pivot_df.to_string(index=False))


In [None]:
latex_table = pivot_df.to_latex(index=False, multirow=True)
print(latex_table)
  