In [6]:
import csv
import re
from pathlib import Path

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import hvplot.pandas # noqa

import ipywidgets as w


In [7]:
DATASET_ROOT = Path("dataset")
DATASET = Path("dataset") / "2025-03-31-experiments.csv"

In [8]:
classification_records = []
analysis_records = []

#
HEADER_KEYS = ["platform", "nbInitiators", "nbTargets"]
ANALYSIS_KEYS = ["analysisTime", "semanticsSize", "graphReduction", "semanticsReduction"]
CLASSIFICATION_PPRINT = {"itf": "ITF", "red": "Redundant", "free": "Free"}

#
with DATASET.open() as dataset_file:
    for entry in csv.DictReader(dataset_file):
        # Cleanup csv keys
        entry = {k.strip(): v for k, v in entry.items()}
        # Ignore analysis failure
        if any(entry[k].strip() == "none" for k in ANALYSIS_KEYS):
            continue
        # Define shared header record fields
        record_header = {k: entry[k] for k in HEADER_KEYS}
        # Collect analysis statistics
        analysis_records.append({
            **record_header,
            **{k: float(entry[k]) for k in ANALYSIS_KEYS}
        })
        # Collect statistics per arity
        arity_max = int(max(k.rsplit(" ", 1)[1] for k in entry if "sem size" in k))
        for i in range(2, arity_max + 1):
            for c, classification in CLASSIFICATION_PPRINT.items():
                # Ignore if no transaction at arity
                sem_size = entry.get(f"sem size {i}", "none").strip()
                if sem_size == "none" or int(sem_size) <= 0:
                    continue
                # Cleanup count
                count = entry.get(f"{c} size {i}", "0").strip()
                if count == "none":
                    count = 0
                # Record classification
                classification_records.append({
                    **record_header,
                    "classification": classification,
                    "arity": str(i),
                    "count": int(count),
                })

classification_df = pd.DataFrame.from_records(classification_records)
analysis_df = pd.DataFrame.from_records(analysis_records)

In [9]:
classification_counts = classification_df.groupby(
    by=["platform", "arity", "classification"]
)["count"].max().sort_index(ascending=False)

classification_counts.hvplot(
    kind="barh",
    by='classification',
    groupby='platform',
    x='arity',
    y='count',
    legend='bottom_right',
    widget_location='bottom',
    stacked=True,
    color=["#246EB9", "#FF9000", "#97CC04", ],
)

In [24]:
analysis_df.hvplot(
    by=['nbInitiators'],
    kind='points',
    logx=True,
    logy=True,
    x='semanticsSize',
    y='analysisTime',
    legend='bottom_right',
    widget_location='bottom',
)


In [11]:
hve = analysis_df.hvplot.explorer()
hve