In [1]:
import dill
import numpy as np
import pandas as pd

from sklearn.metrics import precision_score, roc_auc_score, recall_score, f1_score

import matplotlib
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

path = "../runner/results/"

import pathlib

path = pathlib.Path(path)



#  TOY FS Eval

## noisy data

In [2]:
results = []
for filepath in list(path.glob("paper*.dat")):
    print(filepath)
    with open(filepath,"rb") as file:
        content = dill.load(file=file)
        results.append(content)

toy = {}

for d in results:
    for set_and_model, results in d.items():
        if len(results) > 0:
            toy.setdefault(set_and_model, []).append(results[:30])

..\runner\results\paper.dat


In [3]:
import sys
sys.path.append("../runner/")

import experiment_pipeline
#import paper_output

toy_set_params = experiment_pipeline.toy_set_params
    

def get_truth(params):
    strong=params["strong"]
    weak=params["weak"]
    irrel=params["irr"]
    truth = [True] * (strong + weak) + [False] * irrel
    return truth

def get_truthAR(params):
    strong=params["strong"]
    weak=params["weak"]
    irrel=params["irr"]
    truth = [2] * strong + [1]*weak + [0] * irrel
    return truth



In [4]:
AR = ["FRI","SQ"]


In [5]:
toy

{('Set1',
  'FRI'): [[{'train_scores': 0.9866714107809997,
    'features': array([2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0]),
    'runtime': 2.767524480819702,
    'setname': 'Set1',
    'modelname': 'FRI'},
   {'train_scores': 1.0,
    'features': array([2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0]),
    'runtime': 2.6954505443573,
    'setname': 'Set1',
    'modelname': 'FRI'},
   {'train_scores': 0.9866185410334348,
    'features': array([2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0]),
    'runtime': 2.7184722423553467,
    'setname': 'Set1',
    'modelname': 'FRI'},
   {'train_scores': 0.9933366102149338,
    'features': array([2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0]),
    'runtime': 2.7615113258361816,
    'setname': 'Set1',
    'modelname': 'FRI'},
   {'train_scores': 0.9866761751946276,
    'features': array([2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0]),
    'runtime': 2.7264997959136963,
    'setname': 'Set1',
    'modelname': 'FRI'},
   {'train_scores': 1.0,
    'features': array([2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0,

In [17]:
list(toy_set_params.keys())

['Set1', 'Set2', 'Set3', 'Set4', 'Set5', 'Set6', 'Set7', 'Set8']

In [20]:
def get_sim_scores(stability_res):

    #toyframe = stability_res.iloc[:, stability_res.columns.get_level_values(1).isin(AR)]
    toyframe = stability_res.iloc[
        :,:
    ]
    def get_score_of_series(series, scorefnc):
        setname = series.name[0]

        def get_score(result):
            featset = result["features"]
            featset = featset>0
            truth_set = get_truth(toy_set_params[setname])
            return scorefnc(truth_set, featset)

        prec_vec = map(get_score, series)
        return list(prec_vec)

    toy_f1 = toyframe.apply(get_score_of_series, axis=0, args=[f1_score])

    toy_f1 = (
        toy_f1.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_f1["type"] = "f1"

    toy_scores = toy_f1
    
    grouped_toy_scores = (
        toy_scores.groupby(["model", "data", "type"]).mean().unstack(level="type")
    )

    renamed_toy_scores = (
        grouped_toy_scores.round(decimals=2).unstack(1)
    )
    renamed_toy_scores = renamed_toy_scores.sort_index(axis=1)

    return renamed_toy_scores

index = pd.MultiIndex.from_tuples(toy.keys())
list_df = pd.DataFrame(
    [pd.Series(value[0]) for value in toy.values()], index=index)

toy_pd = list_df.T  # Drop invalid results

table = get_sim_scores(toy_pd).T
noisy = table
print(table.to_latex())

\begin{tabular}{lllrrrr}
\toprule
      &    & model &  ElasticNet &  FRI &    RF &    SQ \\
{} & type & data &             &      &       &       \\
\midrule
score & f1 & Set1 &        0.95 &  1.0 &  0.90 &  0.98 \\
      &    & Set2 &        0.70 &  1.0 &  0.22 &  0.93 \\
      &    & Set3 &        0.85 &  1.0 &  0.70 &  0.98 \\
      &    & Set4 &        0.84 &  1.0 &  0.71 &  0.94 \\
      &    & Set5 &        0.72 &  1.0 &  0.68 &  1.00 \\
      &    & Set6 &        0.49 &  1.0 &  0.17 &  0.97 \\
      &    & Set7 &        0.58 &  1.0 &  0.16 &  0.97 \\
      &    & Set8 &        0.81 &  1.0 &  0.64 &  0.99 \\
\bottomrule
\end{tabular}



In [25]:
def get_sim_scoresAR(stability_res):

    toyframe = stability_res.iloc[:, stability_res.columns.get_level_values(1).isin(AR)]
    def get_score_of_series(series, scorefnc):
        setname = series.name[0]

        def get_score(result):
            featset = result["features"]
            truth_set = get_truthAR(toy_set_params[setname])
            return scorefnc(truth_set, featset, average="macro")

        prec_vec = map(get_score, series)
        return list(prec_vec)

    toy_f1 = toyframe.apply(get_score_of_series, axis=0, args=[f1_score])

    toy_f1 = (
        toy_f1.T.stack()
        .reset_index()
        .drop("level_2", 1)
        .rename(columns={"level_0": "data", 0: "score", "level_1": "model"})
    )
    toy_f1["type"] = "f1"

    toy_scores = toy_f1
    
    grouped_toy_scores = (
        toy_scores.groupby(["model", "data", "type"]).mean().unstack(level="type")
    )

    renamed_toy_scores = (
        grouped_toy_scores.round(decimals=2).unstack(1)
    )
    renamed_toy_scores = renamed_toy_scores.sort_index(axis=1)

    return renamed_toy_scores

index = pd.MultiIndex.from_tuples(toy.keys())
list_df = pd.DataFrame(
    [pd.Series(value[0]) for value in toy.values()], index=index)

toy_pd = list_df.T  # Drop invalid results

table = get_sim_scoresAR(toy_pd).T
noisy = table
print(table.to_latex())

\begin{tabular}{lllrr}
\toprule
      &    & model &  FRI &    SQ \\
{} & type & data &      &       \\
\midrule
score & f1 & Set1 &  1.0 &  0.98 \\
      &    & Set2 &  1.0 &  0.87 \\
      &    & Set3 &  1.0 &  0.88 \\
      &    & Set4 &  1.0 &  0.85 \\
      &    & Set5 &  1.0 &  0.78 \\
      &    & Set6 &  1.0 &  0.76 \\
      &    & Set7 &  1.0 &  0.86 \\
      &    & Set8 &  1.0 &  0.96 \\
\bottomrule
\end{tabular}



In [26]:
toyframe = toy_pd.iloc[
    :, toy_pd.columns.get_level_values(1).isin(AR)
]

def get_score_of_series(series, scorefnc):
    setname = series.name[0]

    def get_score(result):
        featset = result["features"]
        truth_set = get_truthAR(toy_set_params[setname])
        return scorefnc(truth_set, featset, average="weighted")

    prec_vec = map(get_score, series)
    return list(prec_vec)

toy_f1 = toyframe.apply(get_score_of_series, axis=0, args=[f1_score])

In [27]:
toy_f1.mean()

Set1  FRI    1.000000
Set2  FRI    1.000000
Set3  FRI    1.000000
Set4  FRI    1.000000
Set5  FRI    1.000000
Set6  FRI    1.000000
Set7  FRI    0.997559
Set8  FRI    0.997196
Set1  SQ     0.983217
Set2  SQ     0.904621
Set3  SQ     0.876190
Set4  SQ     0.854145
Set5  SQ     0.928571
Set6  SQ     0.930978
Set7  SQ     0.952521
Set8  SQ     0.980155
dtype: float64

In [28]:
trainscore = toy_pd.applymap(lambda res: res["train_scores"]).mean().unstack().round(decimals=2)

In [29]:
print(trainscore.to_latex(multicolumn=False, bold_rows=True))

\begin{tabular}{lrrrr}
\toprule
{} &  ElasticNet &   FRI &    RF &    SQ \\
\midrule
\textbf{Set1} &        0.99 &  0.99 &  1.00 &  1.00 \\
\textbf{Set2} &        0.98 &  0.99 &  1.00 &  1.00 \\
\textbf{Set3} &        0.97 &  1.00 &  1.00 &  1.00 \\
\textbf{Set4} &        0.98 &  0.99 &  1.00 &  1.00 \\
\textbf{Set5} &        0.99 &  1.00 &  1.00 &  1.00 \\
\textbf{Set6} &        0.97 &  1.00 &  0.99 &  0.99 \\
\textbf{Set7} &        0.98 &  0.98 &  1.00 &  1.00 \\
\textbf{Set8} &        0.98 &  1.00 &  1.00 &  1.00 \\
\bottomrule
\end{tabular}



In [31]:
runtime = toy_pd.applymap(lambda res: res["runtime"]).mean().unstack()

In [33]:
print(runtime.to_latex(multicolumn=False, bold_rows=True))

\begin{tabular}{lrrrr}
\toprule
{} &  ElasticNet &         FRI &          RF &         SQ \\
\midrule
\textbf{Set1} &    0.230361 &    2.735491 &    0.631474 &   1.454025 \\
\textbf{Set2} &    0.227407 &    2.836179 &    0.616364 &   1.222412 \\
\textbf{Set3} &    0.216625 &    2.673935 &    0.534196 &   1.175954 \\
\textbf{Set4} &    0.284710 &    3.708574 &    1.813756 &   3.069602 \\
\textbf{Set5} &    0.297724 &    5.415880 &    3.231341 &   5.625539 \\
\textbf{Set6} &    0.291265 &    3.923472 &    1.728877 &   2.569440 \\
\textbf{Set7} &    0.428895 &    6.077930 &    4.814632 &   3.528761 \\
\textbf{Set8} &    2.230633 &  201.069447 &  238.953420 &  77.988276 \\
\bottomrule
\end{tabular}

