<h1>Extracting results</h1>
<p>Here we extract data required to plot results from the data generated by ... </p>

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None 
from glob import glob
import json

<h3>collecting data</h3>
<p>The following function renames some columns, recodes some True/False columns using string values, and outputs a subset of columns into a CSV file that can be used for plotting</p>

In [2]:
def extract_core_results(df):
    df["train_size"] = [
        float(json.loads(holdouts_kwargs)["train_size"])
        for holdouts_kwargs in df.holdouts_kwargs
    ]

    df["features_names"] = [
        json.loads(edge_feature.replace("'", "\""))[0] if pd.notna(edge_feature) else feature_name
        for feature_name, edge_feature in zip(
            df.features_names,
            df["('model_parameters', 'edge_features')"]
        )
    ]
    df["evaluation_negative_sampling_method"] = [
    "DANS"
    if use_scale_free_distribution
    else "UNS"
    for use_scale_free_distribution in df.use_scale_free_distribution
    ]
    df = df[
        [
            pd.isna(a) or a == b
            for a, b in zip(
                df["('features_parameters', 'use_scale_free_distribution')"],
                df["('model_parameters', 'use_scale_free_distribution')"],
            )
        ]
    ]
    df["model_negative_examples"] = [
     "{training}".format(training=("DANS" if f else "UNS"))
        for f in df["('features_parameters', 'use_scale_free_distribution')"]
    ]
    columns = ["features_names","evaluation_negative_sampling_method","model_negative_examples",
           "accuracy","fall_out",
           "informedness", "miss_rate", "balanced_accuracy", "threat_score", "false_discovery_rate",
           "precision", "recall", "specificity", "f1_score", "auroc", "auprc"]
    return df[columns].copy()

<h3>STRING Protein-Protein Association data</h3>

In [3]:
df = pd.concat([
    pd.read_csv(
        path,
        index_col=0
    )
    for path in glob("experiments/Edge Prediction/HomoSapiens/holdout_*/*.csv.gz")
]).reset_index(drop=True)

In [9]:
string_results = extract_core_results(df)

In [10]:
string_results.head()

Unnamed: 0,features_names,evaluation_negative_sampling_method,model_negative_examples,accuracy,fall_out,informedness,miss_rate,balanced_accuracy,threat_score,false_discovery_rate,fowlkes_mallows_index,precision,recall,specificity,f1_score,auroc,auprc
0,First-order LINE,DANS,DANS,0.697383,0.249727,0.394766,0.355507,0.697383,0.515707,0.279268,0.681547,0.720732,0.644493,0.750273,0.680484,0.760481,0.745744
1,First-order LINE,DANS,DANS,0.626023,0.240589,0.252046,0.507365,0.626023,0.397098,0.328125,0.575317,0.671875,0.492635,0.759411,0.568461,0.676276,0.68752
4,DeepWalk SkipGram,UNS,DANS,0.733915,0.315158,0.46783,0.217012,0.733915,0.595357,0.286991,0.74718,0.713009,0.782988,0.684842,0.746362,0.786881,0.737826
5,DeepWalk SkipGram,UNS,DANS,0.575286,0.324059,0.150573,0.525368,0.575286,0.358467,0.405738,0.531089,0.594262,0.474632,0.675941,0.527753,0.559374,0.592515
8,Walklets CBOW,UNS,DANS,0.903217,0.043621,0.806434,0.149945,0.903217,0.814525,0.04881,0.899201,0.95119,0.850055,0.956379,0.897783,0.975267,0.960448


In [11]:
string_results.to_csv("string_results.csv")

<h3>SLI synthetic lethality results</h3>

In [12]:
df = pd.concat([
    pd.read_csv(
        path,
        index_col=0
    )
    for path in glob("experiments/Edge Prediction/(SLDB | HomoSapiens)/holdout_*/*.csv.gz")
]).reset_index(drop=True)

In [13]:
sli_results = extract_core_results(df)

In [15]:
sli_results.head()

Unnamed: 0,features_names,evaluation_negative_sampling_method,model_negative_examples,accuracy,fall_out,informedness,miss_rate,balanced_accuracy,threat_score,false_discovery_rate,fowlkes_mallows_index,precision,recall,specificity,f1_score,auroc,auprc
0,First-order LINE,DANS,DANS,0.697383,0.249727,0.394766,0.355507,0.697383,0.515707,0.279268,0.681547,0.720732,0.644493,0.750273,0.680484,0.760481,0.745744
1,First-order LINE,DANS,DANS,0.626023,0.240589,0.252046,0.507365,0.626023,0.397098,0.328125,0.575317,0.671875,0.492635,0.759411,0.568461,0.676276,0.68752
4,DeepWalk SkipGram,UNS,DANS,0.733915,0.315158,0.46783,0.217012,0.733915,0.595357,0.286991,0.74718,0.713009,0.782988,0.684842,0.746362,0.786881,0.737826
5,DeepWalk SkipGram,UNS,DANS,0.575286,0.324059,0.150573,0.525368,0.575286,0.358467,0.405738,0.531089,0.594262,0.474632,0.675941,0.527753,0.559374,0.592515
8,Walklets CBOW,UNS,DANS,0.903217,0.043621,0.806434,0.149945,0.903217,0.814525,0.04881,0.899201,0.95119,0.850055,0.956379,0.897783,0.975267,0.960448


In [14]:
sli_results.to_csv("sli_results.csv")