<h1>Extracting results</h1>
<p>Here we extract data required to plot results from the data generated by ... </p>

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None 
from glob import glob
import json
from downloaders import BaseDownloader

In [14]:
downloader = BaseDownloader()

downloader.download("https://zenodo.org/record/8156142/files/experiments.tgz")

Downloading to downloads/attach...e=experiments.tgz:   0%|                                                    …

Unnamed: 0,status_code,file_size,downloaded_file_size,url,destination,success,cached,exception,extraction_file_size,extraction_destination,extraction_cached,extraction_success
0,200,110422815,110422815,https://zenodo.org/record/8156142/files/experi...,downloads/attachment; filename=experiments.tgz,True,False,,25,downloads/attachment; filename=experiments,False,True


In [None]:
!ls

<h3>collecting data</h3>
<p>The following function renames some columns, recodes some True/False columns using string values, and outputs a subset of columns into a CSV file that can be used for plotting</p>

In [33]:
def extract_core_results(df):
    df["train_size"] = [
        float(json.loads(holdouts_kwargs)["train_size"])
        for holdouts_kwargs in df.holdouts_kwargs
    ]

    df["features_names"] = [
        json.loads(edge_feature.replace("'", "\""))[0] if pd.notna(edge_feature) else feature_name
        for feature_name, edge_feature in zip(
            df.features_names,
            df["('model_parameters', 'edge_features')"]
        )
    ]
    df["evaluation_negative_sampling_method"] = [
    "DANS"
    if use_scale_free_distribution
    else "UNS"
    for use_scale_free_distribution in df.use_scale_free_distribution
    ]
    df["model_negative_sampling_method"] = [ "DANS" if use_scale_free_distribution
          else "UNS"
          for use_scale_free_distribution in df["('model_parameters', 'use_scale_free_distribution')"]                                  
                                            
        ]
    columns = ["evaluation_mode", "features_names","evaluation_negative_sampling_method",
               "model_negative_sampling_method", "accuracy",  "balanced_accuracy", 
               "false_discovery_rate","matthews_correlation_coefficient",
           "precision", "recall", "specificity", "f1_score", "auroc", "auprc"]
    return df[columns].copy()

In [5]:
# Set to directory in which the 'experiments' directory was placed 
indirectory = input()

KeyboardInterrupt: Interrupted by user

In [8]:
indirectory = "."

<h3>STRING Protein-Protein Association data</h3>

In [9]:
import pandas as pd

df = pd.concat([
    pd.read_csv(
        path,
        index_col=0
    )
    for path in glob(f"{indirectory}/experiments/Edge Prediction/HomoSapiens/holdout_*/*.csv.gz")
]).reset_index(drop=True)

ValueError: No objects to concatenate

In [5]:
string_results = extract_core_results(df)

In [6]:
string_results.head()

Unnamed: 0,evaluation_mode,features_names,evaluation_negative_sampling_method,model_negative_examples,accuracy,balanced_accuracy,false_discovery_rate,matthews_correlation_coefficient,precision,recall,specificity,f1_score,auroc,auprc
2,train,Walklets GloVe,DANS,UNS,0.50431,0.504299,0.496403,0.008798,0.503597,0.610293,0.398304,0.551835,0.504209,0.502719
3,test,Walklets GloVe,DANS,UNS,0.502032,0.502023,0.498269,0.00413,0.501731,0.602818,0.401227,0.547649,0.501541,0.499348
4,train,Degree,DANS,DANS,0.499494,0.499448,0.500232,-0.005322,0.499768,0.988554,0.010341,0.663899,0.487517,0.47403
5,test,Degree,DANS,DANS,0.499014,0.498969,0.500476,-0.009792,0.499524,0.987761,0.010178,0.663505,0.477872,0.469604
6,train,Degree,DANS,DANS,0.470948,0.470928,0.520309,-0.064339,0.479691,0.685,0.256856,0.56425,0.490493,0.517127


In [7]:
string_results.to_csv("string_results.csv")

<h3>SLI synthetic lethality results</h3>

In [8]:
df = pd.concat([
    pd.read_csv(
        path,
        index_col=0
    )
    for path in glob(f"{indirectory}/experiments/Edge Prediction/(SLDB | HomoSapiens)/holdout_*/*.csv.gz")
]).reset_index(drop=True)

In [10]:
sli_results = extract_core_results(df)

In [11]:
sli_results.head()

Unnamed: 0,evaluation_mode,features_names,evaluation_negative_sampling_method,model_negative_examples,accuracy,balanced_accuracy,false_discovery_rate,matthews_correlation_coefficient,precision,recall,specificity,f1_score,auroc,auprc
0,train,First-order LINE,DANS,DANS,0.697383,0.697383,0.279268,0.396993,0.720732,0.644493,0.750273,0.680484,0.760481,0.745744
1,test,First-order LINE,DANS,DANS,0.626023,0.626023,0.328125,0.261524,0.671875,0.492635,0.759411,0.568461,0.676276,0.68752
4,train,DeepWalk SkipGram,UNS,DANS,0.733915,0.733915,0.286991,0.4701,0.713009,0.782988,0.684842,0.746362,0.786881,0.737826
5,test,DeepWalk SkipGram,UNS,DANS,0.575286,0.575286,0.405738,0.15372,0.594262,0.474632,0.675941,0.527753,0.559374,0.592515
8,train,Walklets CBOW,UNS,DANS,0.903217,0.903217,0.04881,0.811031,0.95119,0.850055,0.956379,0.897783,0.975267,0.960448


In [12]:
sli_results.to_csv("sli_results.csv")

<H1>Aggregating Results</H1>
<p>Here, we calculate the mean and standard error of the
    balanced accuracy, false discovery rate, matthews correlation coefficient,
    F1 score, AUROC, and AUPRC.</p>

In [13]:
def get_mean_and_sem(df):
    """
    df should be one of string_results or sli_results
    """
    # We are just interested in the following seven graph/random walk methods
    graph_methods = {'First-order LINE', 'DeepWalk SkipGram', 'Walklets CBOW', 'HOPE',
                 'Second-order LINE','DeepWalk CBOW',    'Walklets SkipGram'}
    df = df[df['features_names'].isin(graph_methods)]
    # Rename columns for conciseness
    df =  df.rename(columns={"evaluation_negative_sampling_method": "evaluation", 
                   "evaluation_mode": "mode","features_names":"methods"}, errors="raise")
    # Define the columns that we want to calculate mean and sem for
    evaluation_d = {'balanced_accuracy':['mean','sem'],
               'false_discovery_rate':['mean','sem'], 
               'matthews_correlation_coefficient':['mean','sem'], 
               'f1_score':['mean','sem'], 
               'auroc':['mean','sem'], 
               'auprc':['mean','sem']}
    # Calculate mean and standard dev
    df2 =  df.groupby(['methods','mode','evaluation']).agg(evaluation_d).reset_index()
    # Make a new column for convenience in planning
    df2["approach"] = df2["evaluation"] + " (" + df2["mode"] + ")"
    return df2

In [14]:
# Rename columns from tuples to simple strings for readability
columns = ["methods",'mode','evaluation', "balanced_acc.mean","balanced_acc.sem","FDR.mean",
                    "FDR.sem", "MCC.mean", "MCC.sem", "F1.mean", "F1.sem", "AUROC.mean",
                     "AUROC.sem", "AUPRC.mean","AUPRC.sem", "approach"]

<h3>SLI</h3>

In [15]:
sli_stats = get_mean_and_sem(sli_results)

In [16]:
sli_stats.head()

Unnamed: 0_level_0,methods,mode,evaluation,balanced_accuracy,balanced_accuracy,false_discovery_rate,false_discovery_rate,matthews_correlation_coefficient,matthews_correlation_coefficient,f1_score,f1_score,auroc,auroc,auprc,auprc,approach
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem,Unnamed: 16_level_1
0,DeepWalk CBOW,test,DANS,0.593863,0.007913,0.401498,0.004222,0.200131,0.016298,0.567944,0.02571,0.61176,0.007149,0.578986,0.004397,DANS (test)
1,DeepWalk CBOW,test,UNS,0.739648,0.030825,0.176195,0.02903,0.499108,0.059584,0.672132,0.044239,0.79091,0.037973,0.799302,0.033149,UNS (test)
2,DeepWalk CBOW,train,DANS,0.703653,0.002509,0.316949,0.009867,0.437531,0.009968,0.722168,0.010358,0.785701,0.007852,0.751405,0.007399,DANS (train)
3,DeepWalk CBOW,train,UNS,0.849059,0.023641,0.118643,0.017021,0.703968,0.045977,0.834441,0.027203,0.895227,0.021091,0.891749,0.016947,UNS (train)
4,DeepWalk SkipGram,test,DANS,0.63081,0.009415,0.368825,0.004188,0.283764,0.020836,0.614877,0.02626,0.652207,0.017394,0.626466,0.008111,DANS (test)


In [17]:
# rename for better legibility
sli_stats.columns = columns
sli_stats.head()

Unnamed: 0,methods,mode,evaluation,balanced_acc.mean,balanced_acc.sem,FDR.mean,FDR.sem,MCC.mean,MCC.sem,F1.mean,F1.sem,AUROC.mean,AUROC.sem,AUPRC.mean,AUPRC.sem,approach
0,DeepWalk CBOW,test,DANS,0.593863,0.007913,0.401498,0.004222,0.200131,0.016298,0.567944,0.02571,0.61176,0.007149,0.578986,0.004397,DANS (test)
1,DeepWalk CBOW,test,UNS,0.739648,0.030825,0.176195,0.02903,0.499108,0.059584,0.672132,0.044239,0.79091,0.037973,0.799302,0.033149,UNS (test)
2,DeepWalk CBOW,train,DANS,0.703653,0.002509,0.316949,0.009867,0.437531,0.009968,0.722168,0.010358,0.785701,0.007852,0.751405,0.007399,DANS (train)
3,DeepWalk CBOW,train,UNS,0.849059,0.023641,0.118643,0.017021,0.703968,0.045977,0.834441,0.027203,0.895227,0.021091,0.891749,0.016947,UNS (train)
4,DeepWalk SkipGram,test,DANS,0.63081,0.009415,0.368825,0.004188,0.283764,0.020836,0.614877,0.02626,0.652207,0.017394,0.626466,0.008111,DANS (test)


In [18]:
sli_stats.to_csv("sli_stats.csv")

<h3>STRING</h3>

In [19]:
string_stats = get_mean_and_sem(string_results)
string_stats.head()

Unnamed: 0_level_0,methods,mode,evaluation,balanced_accuracy,balanced_accuracy,false_discovery_rate,false_discovery_rate,matthews_correlation_coefficient,matthews_correlation_coefficient,f1_score,f1_score,auroc,auroc,auprc,auprc,approach
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem,Unnamed: 16_level_1
0,DeepWalk CBOW,test,DANS,0.822263,0.000686,0.149725,0.000854,0.647233,0.00134,0.814773,0.000802,0.889122,0.000489,0.875925,0.000754,DANS (test)
1,DeepWalk CBOW,test,UNS,0.854046,0.000923,0.08734,0.001204,0.715841,0.001826,0.842817,0.001038,0.921659,0.00082,0.918986,0.000508,UNS (test)
2,DeepWalk CBOW,train,DANS,0.874667,0.0014,0.135235,0.00096,0.75084,0.002827,0.875555,0.00149,0.933964,0.000881,0.912928,0.000511,DANS (train)
3,DeepWalk CBOW,train,UNS,0.906246,0.001618,0.078987,0.00121,0.813801,0.003191,0.903771,0.00172,0.955888,0.001015,0.945469,0.000488,UNS (train)
4,DeepWalk SkipGram,test,DANS,0.866264,0.000729,0.111139,0.000876,0.735877,0.001262,0.861634,0.001018,0.912387,0.000512,0.869357,0.000908,DANS (test)


In [20]:
# rename for better legibility
string_stats.columns = columns
string_stats.head()

Unnamed: 0,methods,mode,evaluation,balanced_acc.mean,balanced_acc.sem,FDR.mean,FDR.sem,MCC.mean,MCC.sem,F1.mean,F1.sem,AUROC.mean,AUROC.sem,AUPRC.mean,AUPRC.sem,approach
0,DeepWalk CBOW,test,DANS,0.822263,0.000686,0.149725,0.000854,0.647233,0.00134,0.814773,0.000802,0.889122,0.000489,0.875925,0.000754,DANS (test)
1,DeepWalk CBOW,test,UNS,0.854046,0.000923,0.08734,0.001204,0.715841,0.001826,0.842817,0.001038,0.921659,0.00082,0.918986,0.000508,UNS (test)
2,DeepWalk CBOW,train,DANS,0.874667,0.0014,0.135235,0.00096,0.75084,0.002827,0.875555,0.00149,0.933964,0.000881,0.912928,0.000511,DANS (train)
3,DeepWalk CBOW,train,UNS,0.906246,0.001618,0.078987,0.00121,0.813801,0.003191,0.903771,0.00172,0.955888,0.001015,0.945469,0.000488,UNS (train)
4,DeepWalk SkipGram,test,DANS,0.866264,0.000729,0.111139,0.000876,0.735877,0.001262,0.861634,0.001018,0.912387,0.000512,0.869357,0.000908,DANS (test)


In [21]:
string_stats.to_csv("string_stats.csv")

In [3]:
def qc_df(df):
    """
    Check our assunptions about the data
    """
    import math
    modes = set(df['evaluation_mode'].unique())
    if modes != {'train', 'test'}:
        raise ValueError(f"Invalid evaluation modes: {modes}")
    # The actual training size may be different depending on actual dataset but should always be near 0.75
    ALLOWABLE_DELTA = 0.001
    traning_sizes = df['train_size'].unique()
    for ts in traning_sizes:
        if abs(ts - 0.75) > ALLOWABLE_DELTA:
            print(f"Malformed training size: {ts}")
    val_unbalance = df['validation_unbalance_rate'].unique()
    if len(val_unbalance) != 1 or abs(val_unbalance[0] - 1.0) > ALLOWABLE_DELTA:
        raise ValueError(f"Invalid validation_unbalance_rate : {validation_unbalance_rate}")
    scale_free_dist = set(df['use_scale_free_distribution'].unique())  
    if scale_free_dist != set([True, False]):
        raise ValueError(f"Invalid scale_free_dist : {validation_unbalance_rate}")
    feature_scale_free= set(df["('features_parameters', 'use_scale_free_distribution')"].unique())
    if feature_scale_free != set([True, False]):
        raise ValueError(f"Invalid feature_scale_free : {feature_scale_free}")

In [43]:

df = pd.read_csv("sli_only_results_jul23.tsv", sep='\t')

In [45]:
df3 = extract_core_results(df)

In [36]:
df3.columns

Index(['evaluation_mode', 'features_names',
       'evaluation_negative_sampling_method', 'model_negative_sampling_method',
       'accuracy', 'balanced_accuracy', 'false_discovery_rate',
       'matthews_correlation_coefficient', 'precision', 'recall',
       'specificity', 'f1_score', 'auroc', 'auprc'],
      dtype='object')

In [46]:
def get_mean_and_std(df):
    """
    df should be one of string_results or sli_results
    """
    # We are just interested in the following seven graph/random walk methods
    graph_methods = {'First-order LINE', 'DeepWalk SkipGram', 'Walklets CBOW', 'HOPE',
                 'Second-order LINE','DeepWalk CBOW',    'Walklets SkipGram'}
    df = df[df['features_names'].isin(graph_methods)]
    # Rename columns for conciseness
    df =  df.rename(columns={"evaluation_negative_sampling_method": "evaluation", 
                   "evaluation_mode": "mode","features_names":"methods"}, errors="raise")
    # Define the columns that we want to calculate mean and sem for
    evaluation_d = {'balanced_accuracy':['mean','std'],
               'false_discovery_rate':['mean','std'], 
               'matthews_correlation_coefficient':['mean','std'], 
               'f1_score':['mean','std'], 
               'auroc':['mean','std'], 
               'auprc':['mean','std']}
    # Calculate mean and standard dev
    df2 =  df.groupby(['methods','mode','evaluation','model_negative_sampling_method']).agg(evaluation_d).reset_index()
    # Make a new column for convenience in planning
    df2["approach"] = df2["evaluation"] + " (" + df2["mode"] + ")"
    return df2

In [47]:

summary = get_mean_and_std(df3)

In [48]:
summary

Unnamed: 0_level_0,methods,mode,evaluation,model_negative_sampling_method,balanced_accuracy,balanced_accuracy,false_discovery_rate,false_discovery_rate,matthews_correlation_coefficient,matthews_correlation_coefficient,f1_score,f1_score,auroc,auroc,auprc,auprc,approach
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 17_level_1
0,DeepWalk CBOW,test,DANS,DANS,0.613175,0.014518,0.286633,0.034964,0.256424,0.033425,0.495685,0.029277,0.679624,0.034233,0.674321,0.028182,DANS (test)
1,DeepWalk CBOW,test,DANS,UNS,0.80311,0.01724,0.181817,0.026,0.607548,0.034897,0.798539,0.017842,0.877389,0.016808,0.87767,0.016867,DANS (test)
2,DeepWalk CBOW,test,UNS,DANS,0.619108,0.024639,0.24385,0.039998,0.281914,0.051737,0.477817,0.049237,0.702873,0.040953,0.698838,0.033761,UNS (test)
3,DeepWalk CBOW,test,UNS,UNS,0.866162,0.015686,0.061168,0.021558,0.742785,0.031252,0.85405,0.017742,0.945937,0.016853,0.949158,0.015656,UNS (test)
4,DeepWalk CBOW,train,DANS,DANS,0.806543,0.029446,0.167236,0.032533,0.615213,0.059065,0.798591,0.030886,0.884297,0.030673,0.881733,0.024366,DANS (train)
5,DeepWalk CBOW,train,DANS,UNS,0.884215,0.018312,0.156271,0.019168,0.774106,0.03661,0.890707,0.017157,0.956501,0.012888,0.953667,0.012262,DANS (train)
6,DeepWalk CBOW,train,UNS,DANS,0.822574,0.016656,0.128462,0.023757,0.651115,0.033708,0.810111,0.018179,0.902691,0.02258,0.897518,0.018843,UNS (train)
7,DeepWalk CBOW,train,UNS,UNS,0.944902,0.016756,0.053202,0.016118,0.889848,0.033476,0.944762,0.016859,0.985537,0.006988,0.984878,0.006402,UNS (train)
8,DeepWalk SkipGram,test,DANS,DANS,0.640712,0.034001,0.27011,0.058812,0.305532,0.072914,0.555229,0.048435,0.629767,0.052951,0.691001,0.047734,DANS (test)
9,DeepWalk SkipGram,test,DANS,UNS,0.852782,0.015346,0.131236,0.018634,0.706409,0.030673,0.849557,0.015638,0.889865,0.013751,0.869104,0.033782,DANS (test)
