In [1]:
import itertools
import pathlib

import yaml
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.transforms
from sklearn.metrics import accuracy_score

import lab.metrics
import lab.analytics

sns.set_style("white")
plt.rcParams.update({
    'font.size': 8,
    'axes.titlesize': 8,
    'legend.fontsize': 8,
})
# plt.rcParams.update({
#     "text.usetex": True,
#     "font.family": "sans-serif",
#     "font.sans-serif": ["Helvetica"]})

WITH_GRID = False

In [2]:
INPUTS = [
    f"../../results/ml-eval-mconn/{path}/predict/{classifier}.csv"
    for classifier in ["kfp-0", "dfnet-0", "varcnn-0"]
    for path in ["tamaraw", "undefended", "front"]
]
OUTPUT_FILE = pathlib.Path("/tmp/plot.png")
WITH_LEGEND = True
WITH_SIMULATED = True

if "snakemake" in globals():
    INPUTS = list(snakemake.input)
    OUTPUT_FILE = pathlib.Path(snakemake.output[0])
    WITH_LEGEND = snakemake.params.get("with_legend", False)
    WITH_SIMULATED = snakemake.params.get("with_simulated", True)

In [3]:
def _load_data(inputs):
    data = (pd.concat([
                pd.read_csv(fname, header=0) for fname in inputs
            ], keys=inputs, names=["path"])
            .fillna(0)
            .rename(columns=lambda c: int(c) if c != "y_true" else c)
            .astype({"y_true": int})
            .assign(
                is_simulated=lambda x: x.index.get_level_values("path").str.contains("simulated"),
                defence=lambda x: x.index.get_level_values("path").map(
                    lambda s: next(defence for defence in ["front", "tamaraw", "undefended"] if defence in s)
                ),
                classifier=lambda x: x.index.get_level_values("path").map(
                    lambda s: next(classifier for classifier in ["kfp", "dfnet", "varcnn"] if classifier in s)
                ))
            .set_index(["defence", "classifier", "is_simulated"])
            .rename(index={"front": "FRONT", "tamaraw": "Tamaraw", "undefended": "Undefended",
                           "kfp": "$k$-FP", "dfnet": "DF", "varcnn": "Var-CNN"})
           )
    return data

data = _load_data(INPUTS)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,y_true,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
defence,classifier,is_simulated,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Tamaraw,$k$-FP,False,0,1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000
Tamaraw,$k$-FP,False,0,1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000
Tamaraw,$k$-FP,False,0,1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000
Tamaraw,$k$-FP,False,0,1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000
Tamaraw,$k$-FP,False,0,1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FRONT,Var-CNN,False,99,1.449788e-10,7.112789e-12,2.015747e-15,1.762092e-12,1.522975e-09,2.173863e-09,2.596373e-17,1.526077e-08,3.679536e-09,...,9.411934e-09,1.202298e-14,6.879830e-09,1.787998e-07,1.912591e-10,3.878535e-10,6.870694e-11,1.163111e-10,2.649622e-09,0.997271
FRONT,Var-CNN,False,99,4.184397e-06,5.538006e-07,7.704123e-08,4.434974e-06,5.755274e-07,4.778302e-04,3.051396e-10,1.209225e-04,3.595397e-05,...,2.680891e-05,4.091941e-08,3.303506e-08,4.095121e-06,1.506543e-05,6.450801e-05,9.378601e-07,6.227106e-06,8.413748e-05,0.935937
FRONT,Var-CNN,False,99,4.292675e-07,2.905551e-07,5.897025e-10,2.606692e-07,1.317687e-07,3.341492e-05,2.672154e-11,3.227483e-06,9.541577e-06,...,2.475094e-06,1.667479e-09,1.385141e-08,6.552798e-06,5.853087e-07,1.419683e-05,7.520061e-08,3.688011e-07,2.846932e-06,0.977473
FRONT,Var-CNN,False,99,4.570857e-07,1.488485e-06,3.778928e-09,1.307442e-06,2.673777e-06,7.911517e-06,3.941725e-10,1.430952e-04,3.136531e-05,...,1.814584e-05,3.501298e-09,9.733274e-07,9.460396e-05,2.783329e-06,6.626171e-06,2.099983e-06,2.441269e-06,1.718505e-05,0.793141


In [5]:
(data.groupby(["defence", "classifier", "is_simulated"])
 .apply(lambda df: accuracy_score(df["y_true"], df.iloc[:, 1:].idxmax(axis=1)))
 .unstack("classifier"))

# data["y_pred"] = data.iloc[:, 1:].idxmax(axis=1)
# data

Unnamed: 0_level_0,classifier,$k$-FP,DF,Var-CNN
defence,is_simulated,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FRONT,False,0.7,0.879,0.918
Tamaraw,False,0.891,0.8525,0.8705
Undefended,False,0.9895,0.988,0.9925
