In [None]:
import os
import itertools
import math

from collections import defaultdict
from itertools import product

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.path import Path
import matplotlib.patches as patches

import seaborn as sns

plt.style.use(["science"])

In [None]:
results_folder = "../results/timeseries/"
models = os.listdir(results_folder)
models = ["MLP", "LSTM", "CNN", "TCN"]
frequencies = [50, 100, 200]
datasets = list(set([d.split("_")[0] for d in os.listdir(results_folder + models[0])]))
print(
    f"{len(models)} models and {len(datasets)} datasets\n",
    f'\tmodels: {", ".join(models)}\n',
    f'\tdatasets: {", ".join(datasets)}',
)
datasets = [
    "TwoPatterns",
    "CinCECGtorso",
    "TwoLeadECG",
    "Wafer",
    "pendigits",
    "FacesUCR",
    "Mallat",
    "FaceAll",
    "Symbols",
    "ItalyPowerDemand",
    "ECG5000",
    "MoteStrain",
    "NonInvasiveFetalECGThorax1",
    "NonInvasiveFetalECGThorax2",
    "SwedishLeaf",
    "FordA",
    "Yoga",
    "UWaveGestureLibraryX",
    "FordB",
    "ElectricDevices",
    "UWaveGestureLibraryY",
    "UWaveGestureLibraryZ",
    "HandOutlines",
    "InsectWingbeatSound",
    "ShapesAll",
    "MedicalImages",
    "PhalangesOutlinesCorrect",
    "ChlorineConcentration",
    "Phoneme",
]

colors_list = ["#0051a2", "#ffd44f", "#f4777f", "#93003a", "#97964a"]
colors = {m: colors_list[i] for i, m in enumerate(models)}

data = defaultdict(lambda: defaultdict(dict))
for d, m, f in itertools.product(datasets, models, frequencies):
    df = pd.read_csv(f"{results_folder}/{m}/{d}_{f}.csv")
    data[d][m][f] = df.copy()


f = 200  # ms
means = {d: {m: data[d][m][f].metric[10:].mean() for m in models} for d in datasets}

df = pd.DataFrame(means).T
df.to_csv("table.csv")
df.loc["MEAN"] = df.mean()
df.loc["STD"] = df.std()
df.loc["RANK"] = df.rank(axis=1, ascending=False).mean()

display(df)

In [None]:
means = {
    d: {
        (m, f): data[d][m][f].metric[10:].mean()
        for m, f in product(models, frequencies)
    }
    for d in datasets
}
pd.DataFrame(means).T

In [None]:
means = [
    [d, m, int(1000 / f), data[d][m][f].metric[:].mean()]
    for d, m, f in product(datasets, models, frequencies)
]
means_df = pd.DataFrame(means, columns=["dataset", "Model", "speed", "kappa"])

plt.rcParams.update({"font.size": 7})

fig_width_pt = 347.12354 * 1.0  # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0 / 72.27  # Convert pt to inches
golden_mean = (math.sqrt(5) - 1.0) / 2.0  # Aesthetic ratio
fig_width = 0.6 * fig_width_pt * inches_per_pt  # width in inches
fig_height = 1 * fig_width * golden_mean  # height in inches
fig_size = [fig_width, fig_height]

with plt.rc_context({"lines.linewidth": 0.75}):
    g = sns.catplot(
        x="speed",
        y="kappa",
        hue="Model",
        dodge=0.1,
        ci=95,
        join=True,
        capsize=0.05,
        errwidth=0.9,
        palette=colors,
        height=fig_height,
        aspect=1 / golden_mean,
        kind="point",
        data=means_df,
        linestyles=":",
        linewidth=0.001,
    )
g.despine(left=True)
plt.xlabel("Speed (instances per seconds)")
plt.savefig("images/stream_speed.pdf", dpi=300)
plt.savefig("images/stream_speed.png", dpi=300)

In [None]:
means = [
    [(len(datasets) - i) / len(datasets), d, m]
    + [data[d][m][f].metric[:].mean() for f in frequencies]
    for m in models
    for i, d in enumerate(datasets)
]
means_df = pd.DataFrame(
    means, columns=["Dataset", "dataset_name", "model"] + [str(f) for f in frequencies]
)
means_df

iris = None
ynames = ["Dataset"] + [str(f) for f in frequencies]
ys = means_df[ynames].values
ymins = ys.min(axis=0)
ymaxs = ys.max(axis=0)
dys = ymaxs - ymins
ymins -= dys * 0.05  # add 5% padding below and above
ymaxs += dys * 0.05

dys = ymaxs - ymins

# transform all data to be compatible with the main axis
zs = np.zeros_like(ys)
zs[:, 0] = ys[:, 0]
zs[:, 1:] = ys[:, 1:]


fig, host = plt.subplots(figsize=(10, 7))

axes = [host] + [host.twinx() for i in range(ys.shape[1] - 1)]
for i, ax in enumerate(axes):
    ax.set_ylim(ymins[i], ymaxs[i])
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    if ax != host:
        ax.spines["left"].set_visible(False)
        ax.yaxis.set_ticks_position("right")
        ax.spines["right"].set_position(("axes", i / (ys.shape[1] - 1)))
    if i == 0:
        ax.set_yticks(
            [(len(datasets) - i) / len(datasets) for i in range(len(datasets))]
        )
        ax.set_yticklabels(datasets)

host.set_xlim(0, ys.shape[1] - 1)
host.set_xticks(range(ys.shape[1]))
host.set_xticklabels(ynames, fontsize=14)
host.tick_params(axis="x", which="major", pad=7)
host.spines["right"].set_visible(False)
host.xaxis.tick_top()
host.set_title("Parallel Coordinates Plot — ADLStream", fontsize=18, pad=12)

legend_handles = [None for _ in means_df["dataset_name"].values]
for j in range(ys.shape[0]):
    # create bezier curves
    verts = list(
        zip(
            [x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)],
            np.repeat(zs[j, :], 3)[1:-1],
        )
    )
    codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
    path = Path(verts, codes)
    patch = patches.PathPatch(
        path, facecolor="none", lw=2, alpha=0.7, edgecolor=colors[means_df.model[j]]
    )
    legend_handles[models.index(means_df.model[j])] = patch
    host.add_patch(patch)
host.legend(
    legend_handles,
    models,
    loc="lower center",
    bbox_to_anchor=(0.5, -0.18),
    ncol=len(models),
    fancybox=True,
    shadow=True,
)
plt.tight_layout()
plt.show()