<h1>Extracting results</h1>
<p>Here we extract data required to plot results from the data generated by ... </p>

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None 
from glob import glob
import json

<h3>collecting data</h3>
<p>The following function renames some columns, recodes some True/False columns using string values, and outputs a subset of columns into a CSV file that can be used for plotting</p>

In [8]:
def extract_core_results(df):
    df["train_size"] = [
        float(json.loads(holdouts_kwargs)["train_size"])
        for holdouts_kwargs in df.holdouts_kwargs
    ]

    df["features_names"] = [
        json.loads(edge_feature.replace("'", "\""))[0] if pd.notna(edge_feature) else feature_name
        for feature_name, edge_feature in zip(
            df.features_names,
            df["('model_parameters', 'edge_features')"]
        )
    ]
    df["evaluation_negative_sampling_method"] = [
    "DANS"
    if use_scale_free_distribution
    else "UNS"
    for use_scale_free_distribution in df.use_scale_free_distribution
    ]
    df = df[
        [
            pd.isna(a) or a == b
            for a, b in zip(
                df["('features_parameters', 'use_scale_free_distribution')"],
                df["('model_parameters', 'use_scale_free_distribution')"],
            )
        ]
    ]
    df["model_negative_examples"] = [
     "{training}".format(training=("DANS" if f else "UNS"))
        for f in df["('features_parameters', 'use_scale_free_distribution')"]
    ]
    columns = ["evaluation_mode", "features_names","evaluation_negative_sampling_method",
               "model_negative_examples", "accuracy",  "balanced_accuracy", 
               "false_discovery_rate","matthews_correlation_coefficient",
           "precision", "recall", "specificity", "f1_score", "auroc", "auprc"]
    return df[columns].copy()

In [None]:
indirectory = input()

<h3>STRING Protein-Protein Association data</h3>

In [4]:
df = pd.concat([
    pd.read_csv(
        path,
        index_col=0
    )
    for path in glob(f"{indirectory}/experiments/Edge Prediction/HomoSapiens/holdout_*/*.csv.gz")
]).reset_index(drop=True)

In [9]:
string_results = extract_core_results(df)

In [10]:
string_results.head()

Unnamed: 0,evaluation_mode,features_names,evaluation_negative_sampling_method,model_negative_examples,accuracy,balanced_accuracy,false_discovery_rate,matthews_correlation_coefficient,precision,recall,specificity,f1_score,auroc,auprc
2,train,Walklets GloVe,DANS,UNS,0.50431,0.504299,0.496403,0.008798,0.503597,0.610293,0.398304,0.551835,0.504209,0.502719
3,test,Walklets GloVe,DANS,UNS,0.502032,0.502023,0.498269,0.00413,0.501731,0.602818,0.401227,0.547649,0.501541,0.499348
4,train,Degree,DANS,DANS,0.499494,0.499448,0.500232,-0.005322,0.499768,0.988554,0.010341,0.663899,0.487517,0.47403
5,test,Degree,DANS,DANS,0.499014,0.498969,0.500476,-0.009792,0.499524,0.987761,0.010178,0.663505,0.477872,0.469604
6,train,Degree,DANS,DANS,0.470948,0.470928,0.520309,-0.064339,0.479691,0.685,0.256856,0.56425,0.490493,0.517127


In [11]:
string_results.to_csv("string_results.csv")

<h3>SLI synthetic lethality results</h3>

In [13]:
df = pd.concat([
    pd.read_csv(
        path,
        index_col=0
    )
    for path in glob(f"{indirectory}/experiments/Edge Prediction/(SLDB | HomoSapiens)/holdout_*/*.csv.gz")
]).reset_index(drop=True)

In [14]:
sli_results = extract_core_results(df)

In [15]:
sli_results.head()

Unnamed: 0,evaluation_mode,features_names,evaluation_negative_sampling_method,model_negative_examples,accuracy,balanced_accuracy,false_discovery_rate,matthews_correlation_coefficient,precision,recall,specificity,f1_score,auroc,auprc
0,train,First-order LINE,DANS,DANS,0.697383,0.697383,0.279268,0.396993,0.720732,0.644493,0.750273,0.680484,0.760481,0.745744
1,test,First-order LINE,DANS,DANS,0.626023,0.626023,0.328125,0.261524,0.671875,0.492635,0.759411,0.568461,0.676276,0.68752
4,train,DeepWalk SkipGram,UNS,DANS,0.733915,0.733915,0.286991,0.4701,0.713009,0.782988,0.684842,0.746362,0.786881,0.737826
5,test,DeepWalk SkipGram,UNS,DANS,0.575286,0.575286,0.405738,0.15372,0.594262,0.474632,0.675941,0.527753,0.559374,0.592515
8,train,Walklets CBOW,UNS,DANS,0.903217,0.903217,0.04881,0.811031,0.95119,0.850055,0.956379,0.897783,0.975267,0.960448


In [16]:
sli_results.to_csv("sli_results.csv")