# Lets-Plot


## Initialization


### Imports


In [1]:
import os
import re
from typing import Callable

import pandas as pd
import scipy
from lets_plot import (
    LetsPlot,
    aes,
    element_line,
    element_rect,
    element_text,
    facet_grid,
    flavor_darcula,
    geom_boxplot,
    geom_density,
    geom_hline,
    geom_line,
    geom_point,
    geom_pointrange,
    geom_violin,
    geom_vline,
    gggrid,
    ggplot,
    ggsize,
    ggtitle,
    guide_legend,
    layer_tooltips,
    margin,
    scale_color_discrete,
    scale_size,
    theme,
    ylim,
)
from lets_plot.mapping import as_discrete

from config.config_type import ConfigAll

In [2]:
LetsPlot.setup_html()

### General Functions


In [3]:
DataframeItemList = list[tuple[str, pd.DataFrame]]
NamePathList = tuple[list[str], list[str]]
DataSliceDict = dict[str, tuple[int | None, int | None]]
ConfigIndexDict = dict[str, int]

In [4]:
sparsity_modes = ["point", "grid", "contour", "skeleton", "region"]

In [5]:
def read_config(ref_path: str, config_index: int = 0) -> ConfigAll:
    import json

    filename = f"config_{config_index}.json" if config_index != 0 else "config.json"
    path = os.path.join(os.path.split(ref_path)[0], filename)
    with open(path, "r") as f:
        return json.load(f)

In [6]:
def get_paths_and_names(
    filter_func: Callable[[str], bool],
    filename: str,
    replace_name_func: Callable[[str], str],
    path: str = "outputs",
) -> NamePathList:
    exp_name_list = list(filter(filter_func, os.listdir(path)))
    path_list = [os.path.join(path, exp_name, filename) for exp_name in exp_name_list]
    name_list = [replace_name_func(exp_name) for exp_name in exp_name_list]
    return path_list, name_list

In [7]:
def get_dataframes(
    path_list: list[str],
    name_list: list[str],
    data_slice_dict: DataSliceDict | None = None,
    config_index_dict: ConfigIndexDict | None = None,
) -> DataframeItemList:
    df_items = []
    for path, name in zip(path_list, name_list):
        df = pd.read_csv(path)
        if data_slice_dict is not None and data_slice_dict.get(name) is not None:
            df = df.iloc[data_slice_dict[name][0] : data_slice_dict[name][1]]
        if config_index_dict is not None:
            config_index = config_index_dict.get(name, 0)
        else:
            config_index = 0
        config = read_config(path, config_index)
        df["batch_size"] = config["data"]["batch_size"]
        df_items.append((name, df))
    return df_items

In [8]:
def combine_dataframes(
    df_item_list: DataframeItemList, new_column: str
) -> pd.DataFrame:
    df_list = []
    for name, df in df_item_list:
        new_df = df.copy()
        new_df[new_column] = name
        df_list.append(new_df)
    return pd.concat(df_list)

In [9]:
def combine_columns(
    df: pd.DataFrame, column_list: list[str], new_column: str
) -> pd.DataFrame:
    df_list = []
    for col in column_list:
        new_df = df.copy()
        new_df.drop(columns=filter(lambda x: x != col, column_list), inplace=True)
        new_df.rename(columns={col: new_column + "_value"}, inplace=True)
        new_df[new_column] = col
        df_list.append(new_df)
    return pd.concat(df_list)

### Data Functions


In [10]:
def replace_method_name(name: str) -> str:
    return name.replace("WS ", "weasel ").replace("PS ", "protoseg ")


def replace_scenario_name(name: str) -> str:
    return re.sub(f'({"|".join(sparsity_modes)})', "separated", name)

In [11]:
def get_paths_and_names_with(
    prefix_text: str, suffix_list: list[str] | None, filename: str
) -> tuple[list[str], list[str]]:
    return get_paths_and_names(
        lambda x: x.startswith(prefix_text)
        and ("dummy" not in x)
        and (x.split(" ")[-1] in suffix_list if suffix_list is not None else True),
        filename,
        lambda x: replace_method_name(x.replace(prefix_text + " ", "")),
    )

In [12]:
def get_meta_dataframe(name_path_list: NamePathList, **kwargs) -> pd.DataFrame:
    path_list, name_list = name_path_list
    df_item_list = get_dataframes(path_list, name_list, **kwargs)
    combined_dataframe = combine_dataframes(df_item_list, "method")
    combined_dataframe["duration"] = combined_dataframe["duration"] / 1000
    combined_dataframe[["method", "scenario"]] = combined_dataframe["method"].str.split(
        " ", expand=True
    )
    return combined_dataframe

In [13]:
def get_tune_dataframe(name_path_list: NamePathList, **kwargs) -> pd.DataFrame:
    path_list, name_list = name_path_list
    df_item_list = get_dataframes(path_list, name_list, **kwargs)
    dataframe = combine_dataframes(df_item_list, "method")
    dataframe["duration"] = dataframe["duration"] / 1000
    dataframe[["method", "scenario"]] = dataframe["method"].str.split(" ", expand=True)
    dataframe["scenario"] = dataframe["scenario"].apply(replace_scenario_name)
    dataframe["sparsity"] = (
        dataframe["sparsity_mode"] + "=" + dataframe["sparsity_value"].astype(str)
    )
    return dataframe

In [14]:
def get_weasel_tune_dataframe(name_path_list: NamePathList, **kwargs) -> pd.DataFrame:
    path_list, name_list = name_path_list
    df_item_list = get_dataframes(path_list, name_list, **kwargs)
    dataframe = combine_dataframes(df_item_list, "scenario")
    dataframe["test_duration"] = dataframe["test_duration"] / 1000
    dataframe["scenario"] = dataframe["scenario"].apply(replace_scenario_name)
    dataframe["sparsity"] = (
        dataframe["sparsity_mode"] + "=" + dataframe["sparsity_value"].astype(str)
    )
    return dataframe

### Chart Functions


In [15]:
def plot_meta_loss(dataframe: pd.DataFrame):
    num_methods = len(dataframe["method"].unique())
    return (
        ggplot(dataframe)
        + geom_line(aes(x="epoch", y="loss", color="scenario"))
        + facet_grid(y="method", scales="free_y", y_order=0)
        + ggsize(1200, 400 * num_methods)
        + flavor_darcula()
    )

In [16]:
def plot_meta_loss_multiple(dataframe: pd.DataFrame):
    num_methods = len(dataframe["method"].unique())
    num_scenarios = len(dataframe["scenario"].unique())
    return (
        ggplot(dataframe)
        + geom_line(aes(x="epoch", y="loss", color="scenario"))
        + facet_grid(x="method", y="scenario", scales="free", x_order=0, y_order=0)
        + ggsize(600 * num_methods, 200 * num_scenarios + 50)
        + flavor_darcula()
        + theme(panel_border=element_rect(size=1), legend_position="bottom")
    )

In [17]:
def plot_meta_metric(dataframe: pd.DataFrame, size_col: str = ""):
    new_dataframe = combine_columns(
        dataframe, ["duration", "post_gpu_percent"], "metric"
    )
    num_methods = len(new_dataframe["method"].unique())
    return (
        ggplot(new_dataframe)
        + geom_line(
            aes(
                x="epoch",
                y="metric_value",
                color="scenario",
                size=as_discrete(size_col, order=1) if size_col != "" else None,
            )
        )
        + facet_grid(x="method", y="metric", scales="free_y", x_order=0, y_order=0)
        + ggsize(600 * num_methods, 800)
        + flavor_darcula()
        + scale_size(range=[0.5, 1])
        + theme(panel_border=element_rect(size=1), legend_position="bottom")
    )

In [18]:
def plot_tune_score_by_scenario(dataframe: pd.DataFrame, size_col: str = ""):
    new_dataframe = combine_columns(dataframe, ["iou_od", "iou_oc"], "iou")
    num_sparsity = len(new_dataframe["sparsity"].unique())
    return (
        ggplot(new_dataframe)
        + geom_line(
            aes(
                x=as_discrete("epoch"),
                y="iou_value",
                color="scenario",
                linetype="method",
                size=as_discrete(size_col, order=1) if size_col != "" else None,
            ),
            tooltips=layer_tooltips().line("@method @scenario").line("@iou_value"),
        )
        + facet_grid(x="sparsity", y="iou", scales="fixed", x_order=1, y_order=0)
        + ggsize(200 * num_sparsity, 600)
        + flavor_darcula()
        + scale_size(range=[0.5, 1])
        + theme(panel_border=element_rect(size=1), legend_position="top")
    )

In [19]:
def plot_tune_metric_by_scenario(dataframe: pd.DataFrame, size_col: str = ""):
    new_dataframe = combine_columns(
        dataframe, ["duration", "post_gpu_percent"], "metric"
    )
    num_sparsity = len(new_dataframe["sparsity"].unique())
    return (
        ggplot(new_dataframe)
        + geom_line(
            aes(
                x=as_discrete("epoch"),
                y="metric_value",
                color="scenario",
                linetype="method",
                size=as_discrete(size_col, order=1) if size_col != "" else None,
            ),
            tooltips=layer_tooltips()
            .line("@method @scenario")
            .format("@metric_value", ".2f")
            .line("@metric_value"),
        )
        + facet_grid(x="sparsity", y="metric", scales="free_y", x_order=1, y_order=0)
        + ggsize(200 * num_sparsity, 600)
        + flavor_darcula()
        + scale_size(range=[0.5, 1])
        + theme(panel_border=element_rect(size=1), legend_position="top")
    )

In [20]:
def plot_weasel_tune_score_by_scenario(
    dataframe: pd.DataFrame, value_col: str, size_col: str = ""
):
    num_epoch = len(dataframe["epoch"].unique())
    num_sparsity = len(dataframe["sparsity"].unique())
    return (
        ggplot(dataframe)
        + geom_line(
            aes(
                x=as_discrete("tune_epoch"),
                y=value_col,
                color="scenario",
                size=as_discrete(size_col, order=1) if size_col != "" else None,
            )
        )
        + facet_grid(x="epoch", y="sparsity", scales="fixed", x_order=1, y_order=1)
        + ggsize(250 * num_epoch, 200 * num_sparsity + 50)
        + flavor_darcula()
        + scale_size(range=[0.5, 1])
        + theme(panel_border=element_rect(size=1), legend_position="top")
    )

In [21]:
def plot_tune_score_by_sparsity(dataframe: pd.DataFrame):
    new_dataframe = combine_columns(dataframe, ["iou_od", "iou_oc"], "iou")
    plot_list = []
    sorted_sparsity_modes = sorted(new_dataframe["sparsity_mode"].unique())
    num_sparsity = len(sorted_sparsity_modes)
    for index, sparsity_mode in enumerate(sorted_sparsity_modes):
        sparsity_plot = (
            ggplot(new_dataframe[new_dataframe["sparsity_mode"] == sparsity_mode])
            + geom_line(
                aes(
                    x="sparsity_value",
                    y="iou_value",
                    color="method",
                    linetype=as_discrete("epoch", order=-1),
                ),
                tooltips=layer_tooltips().line("@method ep=@epoch").line("@iou_value"),
            )
            + ggtitle(sparsity_mode)
            + facet_grid(x="n_shots", y="iou", scales="free_x", x_order=0, y_order=0)
            + ylim(0, 1)
            + flavor_darcula()
            + theme(panel_border=element_rect(size=1), title=element_text(hjust=0.5))
        )
        if (
            index == num_sparsity - 1
            or index == num_sparsity - 2
            and num_sparsity % 2 == 0
        ):
            sparsity_plot += theme(legend_position="bottom")
        else:
            sparsity_plot += theme(legend_position="none")
        plot_list.append(sparsity_plot)
    num_rows = num_sparsity // 2 + num_sparsity % 2
    row_relative_heights = [1 for _ in range(num_rows - 1)] + [1.25]
    return (
        gggrid(plot_list, ncol=2, vspace=30, heights=row_relative_heights)
        + ggsize(1200, 400 * num_rows)
        + flavor_darcula()
    )

In [22]:
def plot_tune_metric_by_sparsity(dataframe: pd.DataFrame, size_col: str = ""):
    new_dataframe = combine_columns(
        dataframe, ["duration", "post_gpu_percent"], "metric"
    )
    new_dataframe["method__sparsity_value"] = (
        new_dataframe["method"] + " s=" + new_dataframe["sparsity_value"].astype(str)
    )
    num_sparsity = len(new_dataframe["sparsity_mode"].unique())
    return (
        ggplot(new_dataframe)
        + geom_line(
            aes(
                x="n_shots",
                y="metric_value",
                color="method__sparsity_value",
                linetype=as_discrete("epoch", order=-1),
                size=as_discrete(size_col, order=1) if size_col != "" else None,
            ),
            tooltips=layer_tooltips()
            .line("@method ep=@epoch s=@sparsity_value")
            .format("@metric_value", ".2f")
            .line("@metric_value"),
        )
        + facet_grid(
            x="sparsity_mode", y="metric", scales="free_y", x_order=1, y_order=0
        )
        + ggsize(200 * num_sparsity, 700)
        + flavor_darcula()
        + scale_size(range=[0.5, 1])
        + scale_color_discrete(guide=guide_legend(ncol=6))
        + theme(panel_border=element_rect(size=1), legend_position="top")
    )

## Exp - Scenario


In [None]:
prefix = "v3 RO-DR L"
suffixes = ["all"] + sparsity_modes + list(map(lambda x: x + "-var", sparsity_modes))
slice_dict = {"weasel all": (0, 70), "protoseg all": (0, 25)}

### Meta


In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "train_loss.csv")
combined_df = get_meta_dataframe((paths, names))

print(names)
combined_df

In [None]:
plot_meta_loss(combined_df)

-   "weasel" method is less stable than "protoseg" method, but better at optimizing "all" scenarios and others.
-   "protoseg" method has trouble optimizing "all" scenarios and "contour" scenario.
-   "all" scenarios is less stable than "separated" scenarios, especially for "protoseg" method.
-   "all-more-embeds" scenario for "protoseg" method is not significantly different than "all" scenario.


In [None]:
plot_meta_metric(combined_df, "batch_size")

-   "weasel" method is slower than "protoseg" method, but the GPU usage is little bit lower, note that "protoseg" method has a much higher batch size.
-   Order of scenarios from slowest to fastest: "region" > "all" = "skeleton" > "contour" > "point" > "grid".
-   Note that "all" scenarios include "point_old" and "grid_old".
-   "region" scenario is significantly slower than others.
-   "all-more-embeds" scenario use same amount of time, but higher GPU usage than the "all" scenario.
-   "point" scenarios has significantly higher GPU usage than others.


### Tune


In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names), data_slice_dict=slice_dict)
combined_df_full = combined_df.copy()
# combined_df = combined_df[combined_df['n_shots'] == 10]

print(names)
combined_df

In [None]:
plot_tune_score_by_scenario(combined_df, "n_shots")

In [None]:
combined_df_flat = combine_columns(combined_df, ["iou_od", "iou_oc"], "iou")
combined_df_flat = combined_df_flat[combined_df_flat["n_shots"] == 10]
point_grid_df_flat = combined_df_flat[combined_df_flat["scenario"] == "all"]
point_grid_df_flat = point_grid_df_flat[
    point_grid_df_flat["sparsity_mode"].isin(["point", "grid", "point_old", "grid_old"])
]
point_grid_df_flat["sparsity_newness"] = point_grid_df_flat["sparsity_mode"].apply(
    lambda x: "new" if "old" not in x else "old"
)
point_grid_df_flat["sparsity_mode"] = point_grid_df_flat["sparsity_mode"].apply(
    lambda x: x.replace("_old", "")
)
point_grid_df_flat["method__sparsity_mode"] = (
    point_grid_df_flat["method"] + " " + point_grid_df_flat["sparsity_mode"]
)

(
    ggplot(point_grid_df_flat)
    + geom_line(aes(x=as_discrete("epoch"), y="iou_value", color="sparsity_newness"))
    + facet_grid(
        x="method__sparsity_mode", y="iou", scales="fixed", x_order=0, y_order=0
    )
    + ggsize(1200, 600)
    + flavor_darcula()
    + theme(panel_border=element_rect(size=1), legend_position="top")
)

In [None]:
plot_tune_metric_by_scenario(combined_df_full, "n_shots")

### Weasel Tune


In [None]:
paths, names = get_paths_and_names_with("v3 RO-DR L WS", suffixes, "tuning_score.csv")

combined_df = get_weasel_tune_dataframe(
    (paths, names), data_slice_dict={"all": (0, 70)}
)
combined_df = combined_df[combined_df["n_shots"] == 10]

print(names)
combined_df

In [None]:
plot_weasel_tune_score_by_scenario(combined_df, "iou_oc")

## Exp - Loss - Short


In [None]:
prefix = "v3 RO-DR S"
suffixes = ["all", "all-bce", "all-bce_2", "all-iou", "all-iou_bce"]

### Meta


In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "train_loss.csv")
combined_df = get_meta_dataframe((paths, names))

print(names)
combined_df

In [None]:
plot_meta_loss_multiple(combined_df)

In [None]:
plot_meta_metric(combined_df)

### Tune


In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names))

print(names)
combined_df

In [None]:
plot_tune_score_by_scenario(combined_df)

In [None]:
plot_tune_metric_by_scenario(combined_df)

### Weasel Tune


In [None]:
paths, names = get_paths_and_names_with("v3 RO-DR S WS", suffixes, "tuning_score.csv")
combined_df = get_weasel_tune_dataframe((paths, names))

print(names)
combined_df

In [None]:
plot_weasel_tune_score_by_scenario(combined_df, "iou_od")

## Exp - Loss - Long


In [None]:
prefix = "v3 RO-DR L"
suffixes = ["all", "all-iou", "all-poor"]
slice_dict = {"weasel all": (0, 70), "protoseg all": (0, 25)}

### Meta


In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "train_loss.csv")
combined_df = get_meta_dataframe((paths, names))

print(names)
combined_df

In [None]:
plot_meta_loss_multiple(combined_df)

In [None]:
plot_meta_metric(combined_df, "batch_size")

### Tune


In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names), data_slice_dict=slice_dict)

print(names)
combined_df

In [None]:
plot_tune_score_by_scenario(combined_df, "n_shots")

In [None]:
plot_tune_metric_by_scenario(combined_df, "n_shots")

### Weasel Tune


In [None]:
paths, names = get_paths_and_names_with("v3 RO-DR L WS", suffixes, "tuning_score.csv")
combined_df = get_weasel_tune_dataframe(
    (paths, names), data_slice_dict={"all": (0, 350)}
)

print(names)
combined_df

In [None]:
plot_weasel_tune_score_by_scenario(combined_df, "iou_od", "n_shots")

## Exp - Sparsity Value


In [45]:
prefix = "v3 RO-DR L"
suffixes = ["all"]
slice_dict = {"weasel all": (70, None), "protoseg all": (25, None)}

### Tune


In [46]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names), data_slice_dict=slice_dict)

print(names)
combined_df

['protoseg all', 'weasel all']


Unnamed: 0,epoch,n_shots,sparsity_mode,sparsity_value,duration,post_gpu_percent,iou_oc,iou_od,batch_size,method,scenario,sparsity
25,40,1,point,1.0,4.047205,3.509689,0.773336,0.327372,36,protoseg,all,point=1.0
26,40,1,point,5.0,1.424403,3.509689,0.794257,0.908571,36,protoseg,all,point=5.0
27,40,1,point,10.0,1.512380,3.509689,0.834834,0.932715,36,protoseg,all,point=10.0
28,40,1,point,20.0,1.476392,3.509689,0.840129,0.933665,36,protoseg,all,point=20.0
29,40,1,grid,10.0,1.318656,3.509689,0.841222,0.926853,36,protoseg,all,grid=10.0
...,...,...,...,...,...,...,...,...,...,...,...,...
289,200,20,point_old,20.0,90.402893,10.815134,0.810732,0.886886,14,weasel,all,point_old=20.0
290,200,20,grid_old,10.0,88.985579,10.815134,0.812201,0.903297,14,weasel,all,grid_old=10.0
291,200,20,grid_old,20.0,88.878786,10.815134,0.795676,0.893416,14,weasel,all,grid_old=20.0
292,200,20,grid_old,30.0,89.554446,10.815134,0.803821,0.905232,14,weasel,all,grid_old=30.0


In [47]:
plot_tune_score_by_sparsity(
    combined_df[(combined_df["method"] == "weasel") & (combined_df["epoch"] == 200)]
)

In [None]:
plot_tune_metric_by_sparsity(combined_df, "batch_size")

### Tune Distribution


In [128]:
iou_od_low, iou_oc_low = 0.8647, 0.5202
iou_od_high, iou_oc_high = 0.9386, 0.8382

In [129]:
combined_df_2 = combine_columns(combined_df, ["iou_od", "iou_oc"], "iou")
combined_df_2["iou_low"] = combined_df_2.apply(
    lambda x: iou_od_low if x["iou"] == "iou_od" else iou_oc_low, axis=1
)
combined_df_2["iou_high"] = combined_df_2.apply(
    lambda x: iou_od_high if x["iou"] == "iou_od" else iou_oc_high, axis=1
)

In [130]:
(
    ggplot(combined_df_2[combined_df_2["sparsity_mode"] != "skeleton"])
    + geom_density(
        aes(x="iou_value", group="method", color="method", fill="method"), alpha=0.3
    )
    + geom_vline(aes(xintercept="iou_low"), size=0.5, linetype="longdash")
    + geom_vline(aes(xintercept="iou_high"), size=0.5, linetype="longdash")
    + facet_grid(x="iou", scales="fixed")
    + ggsize(1200, 400)
    + flavor_darcula()
    + theme(legend_position="top")
)

In [131]:
(
    ggplot(combined_df_2)
    + geom_density(
        aes(x="iou_value", group="method", color="method", fill="method"), alpha=0.3
    )
    + geom_vline(aes(xintercept="iou_low"), size=0.5, linetype="longdash")
    + geom_vline(aes(xintercept="iou_high"), size=0.5, linetype="longdash")
    + facet_grid(x="iou", y="sparsity_mode", scales="fixed")
    + ggsize(1200, 1400)
    + flavor_darcula()
    + theme(legend_position="top")
)

In [132]:
def get_t_value(n, alpha=0.05):
    return scipy.stats.t.ppf(1 - alpha / 2, n - 1)


combined_df_3 = combined_df.groupby(["method", "sparsity_mode"])[
    ["iou_od", "iou_oc"]
].agg(
    count=("iou_od", "count"),
    iou_od_mean=("iou_od", "mean"),
    iou_oc_mean=("iou_oc", "mean"),
    iou_od_std=("iou_od", "std"),
    iou_oc_std=("iou_oc", "std"),
)

combined_df_3_od = combined_df_3.drop(columns=["iou_oc_mean", "iou_oc_std"])
combined_df_3_od = combined_df_3_od.rename(
    columns={"iou_od_mean": "mean_value", "iou_od_std": "std_value"}
)
combined_df_3_od["iou"] = "iou_od"
combined_df_3_oc = combined_df_3.drop(columns=["iou_od_mean", "iou_od_std"])
combined_df_3_oc = combined_df_3_oc.rename(
    columns={"iou_oc_mean": "mean_value", "iou_oc_std": "std_value"}
)
combined_df_3_oc["iou"] = "iou_oc"
combined_df_3 = pd.concat([combined_df_3_od, combined_df_3_oc])

combined_df_3["iou_low"] = combined_df_3.apply(
    lambda x: iou_od_low if x["iou"] == "iou_od" else iou_oc_low, axis=1
)
combined_df_3["iou_high"] = combined_df_3.apply(
    lambda x: iou_od_high if x["iou"] == "iou_od" else iou_oc_high, axis=1
)

combined_df_3["mean_error"] = combined_df_3.apply(
    lambda x: get_t_value(x["count"]) * x["std_value"] / x["count"] ** 0.5, axis=1
)
combined_df_3["mean_low_limit"] = (
    combined_df_3["mean_value"] - combined_df_3["mean_error"]
)
combined_df_3["mean_high_limit"] = (
    combined_df_3["mean_value"] + combined_df_3["mean_error"]
)

combined_df_3.reset_index(inplace=True)
combined_df_3

Unnamed: 0,method,sparsity_mode,count,mean_value,std_value,iou,iou_low,iou_high,mean_error,mean_low_limit,mean_high_limit
0,protoseg,contour,10,0.821112,0.030346,iou_od,0.8647,0.9386,0.021708,0.799404,0.84282
1,protoseg,grid,10,0.834981,0.032015,iou_od,0.8647,0.9386,0.022902,0.812079,0.857883
2,protoseg,point,10,0.827942,0.04961,iou_od,0.8647,0.9386,0.035489,0.792453,0.863431
3,protoseg,region,10,0.851275,0.026295,iou_od,0.8647,0.9386,0.01881,0.832465,0.870086
4,protoseg,skeleton,10,0.834441,0.032362,iou_od,0.8647,0.9386,0.02315,0.81129,0.857591
5,weasel,contour,15,0.859466,0.038638,iou_od,0.8647,0.9386,0.021397,0.838069,0.880863
6,weasel,grid,15,0.873642,0.037376,iou_od,0.8647,0.9386,0.020698,0.852944,0.89434
7,weasel,point,15,0.835535,0.038513,iou_od,0.8647,0.9386,0.021328,0.814207,0.856863
8,weasel,region,15,0.885734,0.016894,iou_od,0.8647,0.9386,0.009355,0.876378,0.895089
9,weasel,skeleton,15,0.773191,0.04922,iou_od,0.8647,0.9386,0.027257,0.745934,0.800447


In [133]:
(
    ggplot(combined_df_3)
    + geom_pointrange(
        aes(
            x="sparsity_mode",
            y="mean_value",
            ymin="mean_low_limit",
            ymax="mean_high_limit",
            color="sparsity_mode",
        )
    )
    + geom_hline(aes(yintercept="iou_low"), size=0.5, linetype="longdash")
    + geom_hline(aes(yintercept="iou_high"), size=0.5, linetype="longdash")
    + facet_grid(x="method", y="iou", scales="fixed")
    + ggsize(1200, 800)
    + flavor_darcula()
)

## Exploration - All Exp


In [None]:
prefix = "v3 RO-DR L"
suffixes = None

In [None]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names))

print(names)
combined_df

In [None]:
# threshold_od, threshold_oc = 0.94, 0.84
threshold_od, threshold_oc = 0.9, 0.8
score_good_df = combined_df[
    (combined_df["iou_od"] > threshold_od) & (combined_df["iou_oc"] > threshold_oc)
]

print(score_good_df["iou_od"].mean(), score_good_df["iou_oc"].mean())

score_good_df.groupby(["method", "sparsity_mode"])[["iou_od", "iou_oc"]].agg(
    count=("iou_od", "count"),
    iou_od_mean=("iou_od", "mean"),
    iou_oc_mean=("iou_oc", "mean"),
)

In [None]:
scenario_all_df = combined_df[combined_df["scenario"] == "all"]
scenario_all_df = scenario_all_df[
    (scenario_all_df["iou_od"] > 0.5) & (scenario_all_df["iou_oc"] > 0.5)
]

scenario_all_df.groupby(["method", "sparsity_mode"])[["iou_od", "iou_oc"]].agg(
    count=("iou_od", "count"),
    iou_od_mean=("iou_od", "mean"),
    iou_oc_mean=("iou_oc", "mean"),
)

In [None]:
scenario_all_df = combined_df[combined_df["scenario"] == "all"]

scenario_all_df.groupby(["method", "epoch"])[["iou_od", "iou_oc"]].agg(
    count=("iou_od", "count"),
    iou_od_mean=("iou_od", "mean"),
    iou_oc_mean=("iou_oc", "mean"),
)

## Exploration - Plot


In [None]:
weasel_train_loss = pd.read_csv("outputs/v1 RO-DR L WS/train_loss.csv")
protoseg_train_loss = pd.read_csv("outputs/v1 RO-DR L PS/train_loss.csv")

train_loss = combine_dataframes(
    [("weasel", weasel_train_loss), ("protoseg", protoseg_train_loss)], "method"
)

In [None]:
(
    ggplot(train_loss)
    + geom_line(aes(x="epoch", y="duration", color="method"))
    + facet_grid("method", scales="free")
    + ggsize(800, 300)
    + flavor_darcula()
)

In [None]:
(
    ggplot(train_loss)
    + geom_line(aes(x="epoch", y="duration", color="method"))
    + facet_grid("method", scales="free")
    + ggsize(800, 300)
    + flavor_darcula()
    + theme(
        legend_position="left",
        line=element_line(size=2),
        # rect=element_rect(size=10),
        text=element_text(size=10),
        axis_text=element_text(size=5),
        axis_title=element_text(size=20),
        axis_line_y=element_line(size=2),
        panel_border=element_rect(size=2, color="yellow"),
        plot_margin=margin(0.5, 0.5, 0.5, 0.5),
        strip_text="blank",
    )
)

## Publication


### Meta: v1 vs v3


In [44]:
new_df = get_tune_dataframe(
    (["outputs/v3 RO-DR L WS all/tuned_score.csv", "outputs/v3 RO-DR L PS all/tuned_score.csv"], ["weasel new", "protoseg new"]),
    data_slice_dict={}
)

new_df = new_df[(new_df["n_shots"] == 20) & new_df["sparsity"].isin(["point=20.0", "grid=10.0", "contour=1.0", "skeleton=1.0", "region=1.0"])]
new_df = new_df[["epoch", "sparsity_mode", "method", "scenario", "iou_oc", "iou_od"]]
# new_df = new_df.groupby(["epoch", "sparsity_mode", "method", "scenario"], as_index=False).mean()

In [45]:
ori_df = combine_dataframes([("weasel", pd.read_csv("outputs/v1 RO-DR L WS/tuned_score.csv")), ("protoseg", pd.read_csv("outputs/v1 RO-DR L PS/tuned_score.csv"))], "method")
ori_df["scenario"] = "old"
ori_df = ori_df[ori_df["sparsity_mode"] != "dense"]
ori_df.drop("duration", axis=1, inplace=True)

In [46]:
combined_df = pd.concat([new_df, ori_df])
combined_df.rename(columns={"scenario": "train_type"}, inplace=True)
combined_df["iou_mean"] = combined_df[["iou_oc", "iou_od"]].mean(axis=1)

plot_df = combined_df.sort_values("iou_mean").drop_duplicates(["sparsity_mode", "method", "train_type"], keep="last").sort_values(["sparsity_mode", "method", "train_type"], ascending=[True, True, False])

In [48]:
(
    ggplot(plot_df)
    + geom_point(aes(x="sparsity_mode", y="iou_mean", color="train_type", size="epoch"))
    + facet_grid(x="method")
    + ggsize(700, 250)
    + scale_size(range=[2, 6])
    + theme(legend_position="right")
)

Meta Params:

-   random

Tune Params:

-   shot=20
-   point=20
-   grid=10
-   contour=1.0
-   skeleton=1.0
-   region=1.0


### Point & Grid: old vs new


In [49]:
point_grid_df = get_tune_dataframe((["outputs/v3 RO-DR L WS all/tuned_score.csv"], ["weasel all"]), data_slice_dict={"weasel all": (0, 70)})
point_grid_df = combine_columns(point_grid_df, ["iou_od", "iou_oc"], "iou")
point_grid_df = point_grid_df[point_grid_df["n_shots"] == 10]
point_grid_df = point_grid_df[
    point_grid_df["sparsity_mode"].isin(["point", "grid", "point_old", "grid_old"])
]
point_grid_df["sparsity_type"] = point_grid_df["sparsity_mode"].apply(
    lambda x: "new" if "old" not in x else "old"
)
point_grid_df["sparsity_mode"] = point_grid_df["sparsity_mode"].apply(
    lambda x: x.replace("_old", "")
)

In [51]:
plot_df = point_grid_df.sort_values("iou_value").drop_duplicates(["sparsity_mode", "sparsity_type", "method", "iou"], keep="last").sort_values(["sparsity_mode", "sparsity_type"], ascending=[True, False])

((
    ggplot(plot_df)
    + geom_point(aes(x="sparsity_mode", y="iou_value", color="sparsity_type", size="epoch"))
    + facet_grid(
        x="iou", scales="fixed"
    )
    + ggsize(500, 200)
    + scale_size(range=[2, 6])
))

Meta Params:

-   random

Tune Params:

-   shot=10
-   point=10
-   grid=25


### Meta: multi-scenario multi-sparsity


In [52]:
prefix = "v3 RO-DR L"
suffixes = ["all"] + sparsity_modes + list(map(lambda x: x + "-var", sparsity_modes))
slice_dict = {"weasel all": (0, 70)}

In [53]:
paths, names = get_paths_and_names_with(prefix, suffixes, "train_loss.csv")
weasel_df = get_meta_dataframe((paths, list(filter(lambda x: "weasel" in x, names))))
weasel_df["sparsity"] = weasel_df["scenario"].apply(lambda x: x.split("-")[0])
weasel_df["scenario"] = weasel_df["scenario"].apply(lambda x: "comb" if x == "all" else "sep-var" if "var" in x else "sep")
weasel_df["loss"] = weasel_df["loss"].clip(upper=17)
weasel_df = weasel_df[weasel_df["epoch"] <= 100]

In [54]:
num_methods = len(weasel_df["method"].unique())
(
    ggplot(weasel_df)
    + geom_line(aes(x="epoch", y="loss", color="sparsity", linetype=as_discrete("scenario", order=-1)))
    + ylim(1, 17)
    + ggsize(600, 600)
    + theme(legend_position="bottom")
)

Meta Params:

-   comb_all: random
-   contour_sep: 1.0
-   grid_sep: 25
-   point_sep: 10
-   region_sep: 1.0
-   skeleton_sep: 1.0
-   contour_sep-var: (0.2, 1.0)
-   grid_sep-var: (15, 50)
-   point_sep-var: (1, 15)
-   region_sep-var: (0.2, 1.0)
-   skeleton_sep-var: (0.2, 1.0)


### Tune: multi-scenario


In [55]:
# iou_od_ref, iou_oc_ref = 0.9386, 0.8382

In [23]:
prefix = "v3 RO-DR L"
suffixes = ["all"] + sparsity_modes + list(map(lambda x: x + "-var", sparsity_modes))
slice_dict = {"weasel all": (0, 70), "protoseg all": (0, 25)}

In [24]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names), data_slice_dict=slice_dict)
combined_df_full = combined_df.copy()
combined_df = combined_df[combined_df['n_shots'] == 10]
combined_df = combined_df[(combined_df['sparsity_mode'] != 'point_old') & (combined_df['sparsity_mode'] != 'grid_old')]
combined_df["scenario"] = combined_df["scenario"].apply(lambda x: "comb" if x == "all" else "sep-var" if "var" in x else "sep")

print(names)
combined_df

['protoseg all', 'protoseg contour', 'protoseg grid', 'protoseg point', 'protoseg region', 'protoseg skeleton', 'weasel all', 'weasel contour', 'weasel contour-var', 'weasel grid', 'weasel grid-var', 'weasel point', 'weasel point-var', 'weasel region', 'weasel region-var', 'weasel skeleton', 'weasel skeleton-var']


Unnamed: 0,epoch,n_shots,sparsity_mode,sparsity_value,duration,post_gpu_percent,iou_oc,iou_od,batch_size,method,scenario,sparsity
0,40,10,point,10.0,3.033257,48.529683,0.800315,0.774793,36,protoseg,comb,point=10.0
1,40,10,grid,25.0,3.235956,48.529683,0.807800,0.758554,36,protoseg,comb,grid=25.0
2,40,10,contour,1.0,3.358578,48.529683,0.724775,0.797796,36,protoseg,comb,contour=1.0
3,40,10,skeleton,1.0,3.644581,48.529683,0.710997,0.773589,36,protoseg,comb,skeleton=1.0
4,40,10,region,1.0,4.842137,48.529683,0.781460,0.784772,36,protoseg,comb,region=1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
0,40,10,skeleton,1.0,80.986999,42.989849,0.494626,0.738394,13,weasel,sep-var,skeleton=1.0
1,80,10,skeleton,1.0,80.798745,42.989849,0.539148,0.823689,13,weasel,sep-var,skeleton=1.0
2,120,10,skeleton,1.0,81.377792,42.989849,0.558740,0.849713,13,weasel,sep-var,skeleton=1.0
3,160,10,skeleton,1.0,81.812356,42.989849,0.447846,0.783044,13,weasel,sep-var,skeleton=1.0


In [25]:
plot_df = combine_columns(combined_df, ["iou_od", "iou_oc"], "iou")
# plot_df["iou_ref"] = plot_df.apply(
#     lambda x: iou_od_ref if x["iou"] == "iou_od" else iou_oc_ref, axis=1
# )
plot_df = plot_df.sort_values("iou_value").drop_duplicates(["sparsity", "method", "scenario", "iou"], keep="last")

In [26]:
num_sparsity = len(plot_df["sparsity"].unique())
(
    ggplot(plot_df)
    + geom_point(
        aes(
            x=as_discrete("sparsity_mode", order=1),
            y="iou_value",
            color=as_discrete("scenario", order=1),
            size="epoch"
        ),
        tooltips=layer_tooltips().line("@method @scenario").line("@iou_value"),
    )
    # + geom_hline(aes(yintercept="iou_ref"), size=0.5, linetype="longdash")
    + facet_grid(x="method", y="iou", scales="fixed")
    + ggsize(600, 600)
    + theme(panel_border=element_rect(size=1), legend_position="bottom")
    + scale_size(range=[2, 6])
) 

Meta Params:

-   comb_all: random
-   contour_sep: 1.0
-   grid_sep: 25
-   point_sep: 10
-   region_sep: 1.0
-   skeleton_sep: 1.0
-   contour_sep-var: (0.2, 1.0)
-   grid_sep-var: (15, 50)
-   point_sep-var: (1, 15)
-   region_sep-var: (0.2, 1.0)
-   skeleton_sep-var: (0.2, 1.0)

Tune Params:

-   shot=10
-   point=10
-   grid=25
-   contour=1.0
-   skeleton=1.0
-   region=1.0


### Tune: multi-sparsity


In [23]:
iou_od_ref, iou_oc_ref = 0.9386, 0.8382

In [24]:
prefix = "v3 RO-DR L"
suffixes = ["all"]
slice_dict = {"weasel all": (70, None), "protoseg all": (25, None)}

In [25]:
paths, names = get_paths_and_names_with(prefix, suffixes, "tuned_score.csv")
combined_df = get_tune_dataframe((paths, names), data_slice_dict=slice_dict)
combined_df = combined_df[(combined_df["sparsity_mode"] != "point_old") & (combined_df["sparsity_mode"] != "grid_old")]

In [26]:
best = combined_df[(combined_df["iou_od"] > iou_od_ref) & (combined_df["iou_oc"] > iou_oc_ref)].copy()
best["iou_mean"] = best[["iou_od", "iou_oc"]].mean(axis=1)
best.sort_values("iou_mean", ascending=False)[["method","n_shots","sparsity","iou_oc","iou_od"]]

Unnamed: 0,method,n_shots,sparsity,iou_oc,iou_od
146,protoseg,1,contour=0.25,0.918903,0.951097
147,protoseg,1,contour=0.5,0.916169,0.945818
148,protoseg,1,contour=1.0,0.914939,0.943498
144,protoseg,1,grid=40.0,0.895542,0.954206
140,protoseg,1,point=20.0,0.888948,0.954288
143,protoseg,1,grid=30.0,0.885804,0.953248
216,weasel,5,grid=30.0,0.898386,0.940016
142,protoseg,1,grid=20.0,0.880598,0.951541
141,protoseg,1,grid=10.0,0.871619,0.950946
139,protoseg,1,point=10.0,0.867029,0.951639


In [27]:
plot_df = combine_columns(combined_df, ["iou_od", "iou_oc"], "iou")
plot_df["iou_ref"] = plot_df.apply(
    lambda x: iou_od_ref if x["iou"] == "iou_od" else iou_oc_ref, axis=1
)

plot_df

Unnamed: 0,epoch,n_shots,sparsity_mode,sparsity_value,duration,post_gpu_percent,iou_value,batch_size,method,scenario,sparsity,iou,iou_ref
25,40,1,point,1.00,4.047205,3.509689,0.327372,36,protoseg,all,point=1.0,iou_od,0.9386
26,40,1,point,5.00,1.424403,3.509689,0.908571,36,protoseg,all,point=5.0,iou_od,0.9386
27,40,1,point,10.00,1.512380,3.509689,0.932715,36,protoseg,all,point=10.0,iou_od,0.9386
28,40,1,point,20.00,1.476392,3.509689,0.933665,36,protoseg,all,point=20.0,iou_od,0.9386
29,40,1,grid,10.00,1.318656,3.509689,0.926853,36,protoseg,all,grid=10.0,iou_od,0.9386
...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,200,20,skeleton,1.00,115.501045,10.815134,0.611465,14,weasel,all,skeleton=1.0,iou_oc,0.8382
282,200,20,region,0.10,178.258303,10.815134,0.756437,14,weasel,all,region=0.1,iou_oc,0.8382
283,200,20,region,0.25,177.988953,10.815134,0.767461,14,weasel,all,region=0.25,iou_oc,0.8382
284,200,20,region,0.50,178.825597,10.815134,0.755744,14,weasel,all,region=0.5,iou_oc,0.8382


In [28]:
(
    ggplot(plot_df)
    + geom_violin(
        aes(
            x="sparsity_mode",
            y="iou_value",
            color="sparsity_mode",
            fill="sparsity_mode",
        ),
        size=0.8, alpha=.5
    )
    + geom_boxplot(aes(x="sparsity_mode",
            y="iou_value",fill='sparsity_mode'), width=0.2, size=0.5)
    + geom_hline(aes(yintercept="iou_ref"), size=0.2, linetype="longdash")
    + facet_grid(x="method", y="iou", scales="fixed")
    + ggsize(700, 600)
    + theme(panel_border=element_rect(size=1), legend_position="bottom")
)

In [32]:
new_plot_df = plot_df.sort_values("iou_value").drop_duplicates(["n_shots", "sparsity_mode", "sparsity_value", "method", "iou"], keep="last")
# new_plot_df["n_shots_str"] = new_plot_df["n_shots"].apply(lambda x: str(x)+"-shots")
new_plot_df.rename(columns={"sparsity_value":"region density"}, inplace=True)

(
    ggplot(new_plot_df[new_plot_df["sparsity_mode"] == "region"])
    + geom_line(
        aes(
            x=as_discrete("region density", order=1),
            y="iou_value",
            color="method",
        ),
    )
    + geom_hline(aes(yintercept="iou_ref"), size=0.2, linetype="longdash")
    + facet_grid(x="n_shots", y="iou", scales="free_x", x_order=1, y_order=1)
    + ggsize(500, 400)
    + ylim(0.0, 1.0)
    + theme(panel_border=element_rect(size=1), legend_position="bottom")
)

Meta Params:

-   random

Tune Params:

-   shot=[1,5,10,20]
-   contour=[0.1,0.25,0.5,1.0]
-   grid=[10,20,30,40]
-   point=[1,5,10,20]
-   region=[0.1,0.25,0.5,1.0]
-   skeleton=[0.1,0.25,0.5,1.0]


# Matplotlib


## Initialization


### Imports


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

plt.style.use("dark_background")

## Meta


### Loss & Duration


In [None]:
weasel_train_loss = pd.read_csv("outputs/v1 RO-DR L WS/train_loss.csv")
protoseg_train_loss = pd.read_csv("outputs/v1 RO-DR L WS/train_loss.csv")

_, axs = plt.subplots(2, 2, figsize=(14, 10))

axs[0][0].set_title("weasel - epoch vs loss")
axs[0][0].plot(weasel_train_loss["epoch"], weasel_train_loss["loss"])

axs[0][1].set_title("protoseg - epoch vs loss")
axs[0][1].plot(protoseg_train_loss["epoch"], protoseg_train_loss["loss"])

axs[1][0].set_title("epoch vs duration in ms")
axs[1][0].plot(
    weasel_train_loss["epoch"], weasel_train_loss["duration"], label="weasel"
)
axs[1][0].plot(
    protoseg_train_loss["epoch"], protoseg_train_loss["duration"], label="protoseg"
)
axs[1][0].legend()

## Tune


### Score


In [None]:
weasel_tuned_score = pd.read_csv("outputs/v1 RO-DR L WS/tuned_score.csv")
protoseg_tuned_score = pd.read_csv("outputs/v1 RO-DR L WS/tuned_score.csv")

sparsity_modes = ["point", "grid", "contour", "skeleton", "region", "dense"]

_, axs = plt.subplots(3, 2, figsize=(12, 15))

for sm in sparsity_modes:
    weasel_df = weasel_tuned_score[weasel_tuned_score["sparsity_mode"] == sm]
    weasel_epochs = [str(ep) for ep in weasel_df["epoch"]]
    axs[0][0].plot(weasel_epochs, weasel_df["iou_od"], label=sm)
    axs[1][0].plot(weasel_epochs, weasel_df["iou_oc"], label=sm)
    axs[2][0].plot(weasel_epochs, weasel_df["duration"], label=sm)

    protoseg_df = protoseg_tuned_score[protoseg_tuned_score["sparsity_mode"] == sm]
    protoseg_epochs = [str(ep) for ep in protoseg_df["epoch"]]
    axs[0][1].plot(protoseg_epochs, protoseg_df["iou_od"], label=sm)
    axs[1][1].plot(protoseg_epochs, protoseg_df["iou_oc"], label=sm)
    axs[2][1].plot(protoseg_epochs, protoseg_df["duration"], label=sm)

axs[0][0].set_title("weasel - epoch vs optic disc IoU")
axs[1][0].set_title("weasel - epoch vs optic cup IoU")
axs[2][0].set_title("weasel - epoch vs duration")
axs[0][0].legend()
axs[1][0].legend()
axs[2][0].legend()
axs[0][0].set_ylim([0, 1])
axs[1][0].set_ylim([0, 1])
axs[0][1].set_title("protoseg - epoch vs optic disc IoU")
axs[1][1].set_title("protoseg - epoch vs optic cup IoU")
axs[2][1].set_title("protoseg - epoch vs duration")
axs[0][1].legend()
axs[1][1].legend()
axs[2][1].legend()
axs[0][1].set_ylim([0, 1])
axs[1][1].set_ylim([0, 1])

## Weasel Tune


### Score


In [None]:
weasel_tuning_score = pd.read_csv("outputs/v1 RO-DR L WS/tuning_score.csv")
weasel_epochs = weasel_tuning_score["epoch"].unique()

sparsity_modes = ["point", "grid", "contour", "skeleton", "region", "dense"]

_, axs = plt.subplots(len(weasel_epochs), 2, figsize=(12, 5 * len(weasel_epochs)))

for i, ep in enumerate(weasel_epochs):
    for sm in sparsity_modes:
        weasel_df = weasel_tuning_score[
            (weasel_tuning_score["sparsity_mode"] == sm)
            & (weasel_tuning_score["epoch"] == ep)
        ]
        weasel_tune_epochs = [str(tep) for tep in weasel_df["tune_epoch"]]
        axs[i][0].plot(weasel_tune_epochs, weasel_df["iou_od"], label=sm)
        axs[i][1].plot(weasel_tune_epochs, weasel_df["iou_oc"], label=sm)
        # axs[i][2].plot(weasel_tune_epochs, weasel_df['test_duration'], label = sm)

    axs[i][0].set_title(f"weasel ep-{ep} - tune epoch vs optic disc IoU")
    axs[i][1].set_title(f"weasel ep-{ep} - tune epoch vs optic cup IoU")
    # axs[i][2].set_title(f'weasel ep-{ep} - tune epoch vs test duration')
    axs[i][0].legend()
    axs[i][1].legend()
    # axs[i][2].legend()
    axs[i][0].set_ylim([0, 1])
    axs[i][1].set_ylim([0, 1])

# Other
