In [None]:
import dill
import numpy as np
import pandas as pd

from sklearn.metrics import precision_score, roc_auc_score, recall_score, f1_score

import matplotlib
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

path = "../runner/results/"

import pathlib

path = pathlib.Path(path)


exp20 = {}
exp30 = {}


#  TOY FS Eval

## noisy data

In [None]:
results = []
for filepath in list(path.glob("res_all_noise05*")):
    print(filepath)
    with open(filepath,"rb") as file:
        content = dill.load(file=file)
        results.append(content)

toy = {}

for d in results:
    for set_and_model, results in d.items():
        if len(results) > 0:
            toy.setdefault(set_and_model, []).append(results[:30])

In [6]:
sim_set_names = {
    "Set1": "Set 1",
    "Set2": "Set 2",
    "Set3": "Set 3",
    "Set4": "Set 4",
    "Set5": "Set 5",
    "Set6": "Set 6",
    "Set7": "Set 7"
    }


toy_set_params = {
            "Set1": {"n": 150, "strong": 6, "weak": 0, "irr": 6},
            "Set2": {"n": 150, "strong": 0, "weak": 6, "irr": 6},
            "Set3": {"n": 150, "strong": 3, "weak": 4, "irr": 3},
            "Set4": {"n": 256, "strong": 6, "weak": 6, "irr": 6},
            "Set5": {"n": 512, "strong": 1, "weak": 2, "irr": 11},
            "Set6": {"n": 200, "strong": 1, "weak": 20, "irr": 0},
            "Set7": {"n": 200, "strong": 1, "weak": 20, "irr": 20},
        }

def get_truth(params):
    strong=params["strong"]
    weak=params["weak"]
    irrel=params["irr"]
    truth = [True] * (strong + weak) + [False] * irrel
    return truth

def get_sim_scores(stability_res):

    toyframe = stability_res.iloc[
        :, stability_res.columns.get_level_values(0).str.contains("Set")
    ]

    def get_score_of_series(series, scorefnc):
        setname = series.name[0]

        def get_score(result):
            featset = result["features"]
            featset[featset==2] = 1
            truth_set = get_truth(toy_set_params[setname])
            return scorefnc(truth_set, featset)

        prec_vec = map(get_score, series)
        return list(prec_vec)

    toy_precision = toyframe.apply(get_score_of_series, axis=0, args=[precision_score])
    toy_recall = toyframe.apply(get_score_of_series, axis=0, args=[recall_score])
    toy_f1 = toyframe.apply(get_score_of_series, axis=0, args=[f1_score])

    toy_precision = (
        toy_precision.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_precision["type"] = "precision"
    toy_recall = (
        toy_recall.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_recall["type"] = "recall"
    toy_f1 = (
        toy_f1.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_f1["type"] = "f1"

    toy_scores = pd.concat([toy_precision, toy_recall, toy_f1])

    # toy_f1.groupby(["model", "data"]).mean().unstack()

    grouped_toy_scores = (
        toy_scores.groupby(["model", "data", "type"]).mean().unstack(level="type")
    )
    # grouped_toy_scores = grouped_toy_scores.unstack("data")

    renamed_toy_scores = (
        grouped_toy_scores.round(decimals=2).unstack(1).rename(columns=sim_set_names)
    )
    renamed_toy_scores = renamed_toy_scores.sort_index(axis=1)

    return renamed_toy_scores

index = pd.MultiIndex.from_tuples(toy.keys())
list_df = pd.DataFrame(
    [pd.Series(value[0]) for value in toy.values()], index=index)

toy_pd = list_df.T  # Drop invalid results

table = get_sim_scores(toy_pd).T
noisy = table
print(table.to_latex())

\begin{tabular}{lllrrr}
\toprule
      &        & model &  ElasticNet &  FRI\_exc &  FRI\_imp \\
{} & type & data &             &          &          \\
\midrule
score & f1 & Set 1 &        0.92 &     0.95 &     0.98 \\
      &        & Set 2 &        0.89 &     0.97 &     0.98 \\
      &        & Set 3 &        0.85 &     0.97 &     0.96 \\
      &        & Set 4 &        0.80 &     0.96 &     0.97 \\
      &        & Set 5 &        0.86 &     1.00 &     1.00 \\
      &        & Set 6 &        0.56 &     0.94 &     0.94 \\
      &        & Set 7 &        0.46 &     0.90 &     0.91 \\
      & precision & Set 1 &        0.87 &     1.00 &     1.00 \\
      &        & Set 2 &        0.86 &     1.00 &     1.00 \\
      &        & Set 3 &        0.90 &     1.00 &     1.00 \\
      &        & Set 4 &        0.91 &     1.00 &     1.00 \\
      &        & Set 5 &        0.81 &     1.00 &     1.00 \\
      &        & Set 6 &        1.00 &     1.00 &     1.00 \\
      &        & Set 7 &        0

## clean data

In [25]:
results = []
for filepath in list(path.glob("res_all_noise00.*")):
    print(filepath)
    with open(filepath,"rb") as file:
        content = dill.load(file=file)
        results.append(content)

toy = {}

for d in results:
    for set_and_model, results in d.items():
        if len(results) > 0:
            toy.setdefault(set_and_model, []).append(results[:30])

../pipeline/results/res_all_noise00.dat


In [26]:
sim_set_names = {
    "Set1": "Set 1",
    "Set2": "Set 2",
    "Set3": "Set 3",
    "Set4": "Set 4",
    "Set5": "Set 5",
    "Set6": "Set 6",
    "Set7": "Set 7"
    }


toy_set_params = {
            "Set1": {"n": 150, "strong": 6, "weak": 0, "irr": 6},
            "Set2": {"n": 150, "strong": 0, "weak": 6, "irr": 6},
            "Set3": {"n": 150, "strong": 3, "weak": 4, "irr": 3},
            "Set4": {"n": 256, "strong": 6, "weak": 6, "irr": 6},
            "Set5": {"n": 512, "strong": 1, "weak": 2, "irr": 11},
            "Set6": {"n": 200, "strong": 1, "weak": 20, "irr": 0},
            "Set7": {"n": 200, "strong": 1, "weak": 20, "irr": 20},
        }

def get_truth(params):
    strong=params["strong"]
    weak=params["weak"]
    irrel=params["irr"]
    truth = [True] * (strong + weak) + [False] * irrel
    return truth

def get_sim_scores(stability_res):

    toyframe = stability_res.iloc[
        :, stability_res.columns.get_level_values(0).str.contains("Set")
    ]

    def get_score_of_series(series, scorefnc):
        setname = series.name[0]

        def get_score(result):
            featset = result["features"]
            featset[featset==2] = 1
            truth_set = get_truth(toy_set_params[setname])
            return scorefnc(truth_set, featset)

        prec_vec = map(get_score, series)
        return list(prec_vec)

    toy_precision = toyframe.apply(get_score_of_series, axis=0, args=[precision_score])
    toy_recall = toyframe.apply(get_score_of_series, axis=0, args=[recall_score])
    toy_f1 = toyframe.apply(get_score_of_series, axis=0, args=[f1_score])

    toy_precision = (
        toy_precision.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_precision["type"] = "precision"
    toy_recall = (
        toy_recall.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_recall["type"] = "recall"
    toy_f1 = (
        toy_f1.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_f1["type"] = "f1"

    toy_scores = pd.concat([toy_precision, toy_recall, toy_f1])

    # toy_f1.groupby(["model", "data"]).mean().unstack()

    grouped_toy_scores = (
        toy_scores.groupby(["model", "data", "type"]).mean().unstack(level="type")
    )
    # grouped_toy_scores = grouped_toy_scores.unstack("data")

    renamed_toy_scores = (
        grouped_toy_scores.round(decimals=2).unstack(1).rename(columns=sim_set_names)
    )
    renamed_toy_scores = renamed_toy_scores.sort_index(axis=1)

    return renamed_toy_scores

index = pd.MultiIndex.from_tuples(toy.keys())
list_df = pd.DataFrame(
    [pd.Series(value[0]) for value in toy.values()], index=index)

toy_pd = list_df.T  # Drop invalid results

table = get_sim_scores(toy_pd).T
clean = table
print(table.to_latex())

\begin{tabular}{lllrrr}
\toprule
      &        & model &  ElasticNet &  FRI\_exc &  FRI\_imp \\
{} & type & data &             &          &          \\
\midrule
score & f1 & Set 1 &        0.94 &      1.0 &      1.0 \\
      &        & Set 2 &        0.79 &      1.0 &      1.0 \\
      &        & Set 3 &        0.81 &      1.0 &      1.0 \\
      &        & Set 4 &        0.83 &      1.0 &      1.0 \\
      &        & Set 5 &        0.83 &      1.0 &      1.0 \\
      &        & Set 6 &        0.25 &      1.0 &      1.0 \\
      &        & Set 7 &        0.49 &      1.0 &      1.0 \\
      & precision & Set 1 &        0.90 &      1.0 &      1.0 \\
      &        & Set 2 &        0.86 &      1.0 &      1.0 \\
      &        & Set 3 &        0.95 &      1.0 &      1.0 \\
      &        & Set 4 &        0.95 &      1.0 &      1.0 \\
      &        & Set 5 &        0.89 &      1.0 &      1.0 \\
      &        & Set 6 &        1.00 &      1.0 &      1.0 \\
      &        & Set 7 &        0

## Combined

In [27]:
combined = pd.concat([clean,noisy],axis=1)

In [28]:
print(combined.to_latex())

\begin{tabular}{lllrrrrrr}
\toprule
      &        & model &  ElasticNet &  FRI\_exc &  FRI\_imp &  ElasticNet &  FRI\_exc &  FRI\_imp \\
{} & type & data &             &          &          &             &          &          \\
\midrule
score & f1 & Set 1 &        0.94 &      1.0 &      1.0 &        0.89 &     0.70 &     0.72 \\
      &        & Set 2 &        0.79 &      1.0 &      1.0 &        0.86 &     0.97 &     0.95 \\
      &        & Set 3 &        0.81 &      1.0 &      1.0 &        0.87 &     0.85 &     0.84 \\
      &        & Set 4 &        0.83 &      1.0 &      1.0 &        0.82 &     0.70 &     0.71 \\
      &        & Set 5 &        0.83 &      1.0 &      1.0 &        0.91 &     1.00 &     1.00 \\
      &        & Set 6 &        0.25 &      1.0 &      1.0 &        0.64 &     0.78 &     0.67 \\
      &        & Set 7 &        0.49 &      1.0 &      1.0 &        0.53 &     0.66 &     0.51 \\
      & precision & Set 1 &        0.90 &      1.0 &      1.0 &        0.85 &  

In [29]:
combined

Unnamed: 0_level_0,Unnamed: 1_level_0,model,ElasticNet,FRI_exc,FRI_imp,ElasticNet,FRI_exc,FRI_imp
Unnamed: 0_level_1,type,data,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
score,f1,Set 1,0.94,1.0,1.0,0.89,0.7,0.72
score,f1,Set 2,0.79,1.0,1.0,0.86,0.97,0.95
score,f1,Set 3,0.81,1.0,1.0,0.87,0.85,0.84
score,f1,Set 4,0.83,1.0,1.0,0.82,0.7,0.71
score,f1,Set 5,0.83,1.0,1.0,0.91,1.0,1.0
score,f1,Set 6,0.25,1.0,1.0,0.64,0.78,0.67
score,f1,Set 7,0.49,1.0,1.0,0.53,0.66,0.51
score,precision,Set 1,0.9,1.0,1.0,0.85,0.97,1.0
score,precision,Set 2,0.86,1.0,1.0,0.84,1.0,1.0
score,precision,Set 3,0.95,1.0,1.0,0.9,1.0,1.0


In [37]:
toy_pd.applymap(lambda res: res["train_scores"]).mean().unstack()

Unnamed: 0,ElasticNet,FRI_exc,FRI_imp
Set1,0.991333,0.994444,0.996333
Set2,0.993778,0.992111,0.994
Set3,0.995111,0.992889,0.993556
Set4,0.995965,0.995634,0.996218
Set5,0.997848,0.997202,0.99785
Set6,0.995308,0.998324,0.998329
Set7,0.99315,0.992167,0.994915


In [31]:
toy_pd.applymap(lambda res: res["test_scores"]["mmae"]).mean().unstack()

KeyError: ('test_scores', 'occurred at index (Set1, FRI_exc)')