In [1]:
import os
import pandas as pd
import networkx as nx
from collections import defaultdict, Counter
import random
import numpy as np
import json
from data_loader.data_loaders import DataLoader
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import ttest_rel, ttest_ind

In [28]:
metrics = ['loss', 'accuracy', 'precision', 'recall', 'roc_auc',
     'pr_auc', 'f1_score', 'mcc']

def get_comparison_bars(runs, metrics, title):
    results_file = lambda run: f'saved/log/DrugCombDB/{run}/results.csv'

    dfs = [pd.read_csv(results_file(run['path'])) for run in runs]

    data = []
    for i in range(len(runs)):
        if not runs[i].get('single_run', False):
            data.append(go.Bar(name=runs[i]['name'], x=metrics, y=dfs[i].mean()[metrics], error_y=dict(type='data', array=dfs[i].std()[metrics])))
        else:
            data.append(go.Bar(name=runs[i]['name'], x=metrics, y=dfs[i].mean()[metrics]))

    fig = go.Figure(data=data)
    fig.update_layout(barmode='group', title=title)
    fig.show()
    
    if len(runs) == 2:
        for metric in metrics:
            # t, p = ttest_rel(df1[metric], df2[metric])
            # print(f'{metric}: t={t}, p={p}')
            t, p = ttest_ind(dfs[0][metric], dfs[1][metric])
            print(f'{metric}: t={t}, p={p}')

Comparison between using graph and going graphless

In [13]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1206_183245',
        'name': "Graphless"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=0.8807080277616022, p=0.40415696718186045
precision: t=0.39724755141430934, p=0.7015729340128334
recall: t=1.1634840108445386, p=0.27815873885471976
roc_auc: t=0.5502717207841324, p=0.5971578725799047
pr_auc: t=-0.0718818974236095, p=0.944460483519214
f1_score: t=0.835932195614633, p=0.4274530317149271
mcc: t=0.8615882345723568, p=0.41399182380760946


Comparison between cleanup and no cleanup

In [8]:
runs = [
    {
        'path': '1206_182941',
        'name': "cleaned up"
    },
    {
        'path': '1206_185325',
        'name': "not cleaned up"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of Neighborhood Cleaning on Performance")

accuracy: t=0.6276590629917481, p=0.5477204654439113
precision: t=0.3814394061808027, p=0.7128084979899947
recall: t=0.19923162489663127, p=0.8470521121505106
roc_auc: t=-0.2347361957203888, p=0.8203100137406858
pr_auc: t=0.09821921452150109, p=0.924174581138018
f1_score: t=0.41974937532191026, p=0.6857142453090821
mcc: t=0.6152182708070042, p=0.555502879812108


In [None]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1207_004035',
        'name': "Graphless"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance (different parameters)")

accuracy: t=0.4618264831608488, p=0.6565046228810371
precision: t=0.1407511802638749, p=0.8915460197302164
recall: t=1.149317983265661, p=0.2836234836936246
roc_auc: t=0.005741670193644198, p=0.9955594309632111
pr_auc: t=-0.4660528429726561, p=0.6536040547328785
f1_score: t=0.6284997279315183, p=0.547196915669921
mcc: t=0.5109767008622084, p=0.6231667340964026


In [35]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1206_183245',
        'name': "Graphless"
    },
    {
        'path': '1215_010223',
        'name': "Graphless simple"
    },
    {
        'path': '1215_024038',
        'name': "Graphless complex"
    },
    {
        'path': '1215_030957',
        'name': "Graphless complex (dropout: 0.2)"
    },
    {
        'path': '1215_044339',
        'name': "Graphless complex (dropout: 0.3)"
    },
    {
        'path': '1215_042124',
        'name': "Graphless complex (dropout: 0.45)"
    }
]
get_comparison_bars(runs, metrics[1:], "Compare different models")

In [34]:
runs = [
    {
        'path': '1215_201954',
        'name': "Uses Graph with Step LR",
        'single_run': True
    },
    {
        'path': '1215_173550',
        'name': "Cosine Annealing with Warm Restarts",
        'single_run': True
    },
    {
        'path': '1215_171752',
        'name': "Cosine Annealing LR",
        'single_run': True
    },
    {
        'path': '1215_032918',
        'name': "Graphless complex version (dropout: 0.3)",
        'single_run': True
    },
    {
        'path': '1215_035944',
        'name': "Graphless complex version (dropout: 0.4)",
        'single_run': True
    },
    {
        'path': '1215_041735',
        'name': "Graphless complex version (dropout: 0.45)",
        'single_run': True
    },
]
get_comparison_bars(runs, metrics[1:], "Single run comparison")

In [28]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1215_042124',
        'name': "Graphless complex (dropout: 0.45)"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=-1.620468728354434, p=0.14379050479664318
precision: t=-2.399213422848414, p=0.04322975940313702
recall: t=2.926775725958032, p=0.019092151742319652
roc_auc: t=-4.552901791977158, p=0.001867240038841727
pr_auc: t=-4.570633564110166, p=0.0018242719009925034
f1_score: t=0.4738406956734791, p=0.6482756201059536
mcc: t=-1.3583384393086742, p=0.21141896951100264


In [30]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1215_030957',
        'name': "Graphless complex version (dropout: 0.2)"
    },
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=-0.25989031296089754, p=0.8015118466082678
precision: t=-0.5432554954594914, p=0.6017583900449408
recall: t=0.579404082992289, p=0.5782629893285911
roc_auc: t=-3.4775659790345443, p=0.008349419465892923
pr_auc: t=-4.3256510656962925, p=0.002526745253734908
f1_score: t=0.05697688424916698, p=0.955960993911305
mcc: t=-0.32741814967437793, p=0.7517551311396486


In [32]:

runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1215_044339',
        'name': "Graphless complex (dropout: 0.3)"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=-2.378572549348777, p=0.044645033241818226
precision: t=-3.099392484188224, p=0.014679259159387866
recall: t=1.044526443176491, p=0.32677402340407274
roc_auc: t=-5.003944781379455, p=0.0010476515028065819
pr_auc: t=-5.351738119477179, p=0.0006844207528693353
f1_score: t=-0.6576223187845042, p=0.5292442497893071
mcc: t=-2.1615941151862734, p=0.0626336274025126


In [29]:
runs = [
    {
        'path': '1215_201954',
        'name': "Uses Graph with Step LR",
        'single_run': True
    },
    {
        'path': '1215_173550',
        'name': "Cosine Annealing with Warm Restarts",
        'single_run': True
    },
    {
        'path': '1215_171752',
        'name': "Cosine Annealing LR",
        'single_run': True
    }
]
get_comparison_bars(runs, metrics[1:], "Graph Model")