In [1]:
import os
import pandas as pd
import networkx as nx
from collections import defaultdict, Counter
import random
import numpy as np
import json
from data_loader.data_loaders import DataLoader
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import ttest_rel, ttest_ind

In [2]:
metrics = ['loss', 'accuracy', 'precision', 'recall', 'roc_auc',
     'pr_auc', 'f1_score', 'mcc']

def get_comparison_bars(runs, metrics, title):
    results_file = lambda run: f'saved/log/DrugCombDB/{run}/results.csv'

    dfs = [pd.read_csv(results_file(run['path'])) for run in runs]

    data = []
    for i in range(len(runs)):
        if not runs[i].get('single_run', False):
            data.append(go.Bar(name=runs[i]['name'], x=metrics, y=dfs[i].mean()[metrics], error_y=dict(type='data', array=dfs[i].std()[metrics])))
        else:
            data.append(go.Bar(name=runs[i]['name'], x=metrics, y=dfs[i].mean()[metrics]))

    fig = go.Figure(data=data)
    fig.update_layout(barmode='group', title=title)
    fig.show()
    
    if len(runs) == 2:
        for metric in metrics:
            # t, p = ttest_rel(df1[metric], df2[metric])
            # print(f'{metric}: t={t}, p={p}')
            t, p = ttest_ind(dfs[0][metric], dfs[1][metric])
            print(f'{metric}: t={t}, p={p}')

Comparison between using graph and going graphless

In [13]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1206_183245',
        'name': "Graphless"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=0.8807080277616022, p=0.40415696718186045
precision: t=0.39724755141430934, p=0.7015729340128334
recall: t=1.1634840108445386, p=0.27815873885471976
roc_auc: t=0.5502717207841324, p=0.5971578725799047
pr_auc: t=-0.0718818974236095, p=0.944460483519214
f1_score: t=0.835932195614633, p=0.4274530317149271
mcc: t=0.8615882345723568, p=0.41399182380760946


Comparison between cleanup and no cleanup

In [8]:
runs = [
    {
        'path': '1206_182941',
        'name': "cleaned up"
    },
    {
        'path': '1206_185325',
        'name': "not cleaned up"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of Neighborhood Cleaning on Performance")

accuracy: t=0.6276590629917481, p=0.5477204654439113
precision: t=0.3814394061808027, p=0.7128084979899947
recall: t=0.19923162489663127, p=0.8470521121505106
roc_auc: t=-0.2347361957203888, p=0.8203100137406858
pr_auc: t=0.09821921452150109, p=0.924174581138018
f1_score: t=0.41974937532191026, p=0.6857142453090821
mcc: t=0.6152182708070042, p=0.555502879812108


In [None]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1207_004035',
        'name': "Graphless"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance (different parameters)")

accuracy: t=0.4618264831608488, p=0.6565046228810371
precision: t=0.1407511802638749, p=0.8915460197302164
recall: t=1.149317983265661, p=0.2836234836936246
roc_auc: t=0.005741670193644198, p=0.9955594309632111
pr_auc: t=-0.4660528429726561, p=0.6536040547328785
f1_score: t=0.6284997279315183, p=0.547196915669921
mcc: t=0.5109767008622084, p=0.6231667340964026


In [13]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1206_183245',
        'name': "Graphless"
    },
    {
        'path': '1215_010223',
        'name': "Graphless simple"
    },
    {
        'path': '0106_232004',
        'name': "Graphless complex (dropout: 0.3, l: 3)"
    },
    {
        'path': '0106_233137',
        'name': "Graphless complex (dropout: 0.3, l: 4)"
    },
    {
        'path': '0106_235344',
        'name': "Graphless complex (dropout: 0.3, l: 6)"
    }
]
get_comparison_bars(runs, metrics[1:], "Compare different models")

In [34]:
runs = [
    {
        'path': '1215_201954',
        'name': "Uses Graph with Step LR",
        'single_run': True
    },
    {
        'path': '1215_173550',
        'name': "Cosine Annealing with Warm Restarts",
        'single_run': True
    },
    {
        'path': '1215_171752',
        'name': "Cosine Annealing LR",
        'single_run': True
    },
    {
        'path': '1215_032918',
        'name': "Graphless complex version (dropout: 0.3)",
        'single_run': True
    },
    {
        'path': '1215_035944',
        'name': "Graphless complex version (dropout: 0.4)",
        'single_run': True
    },
    {
        'path': '1215_041735',
        'name': "Graphless complex version (dropout: 0.45)",
        'single_run': True
    },
]
get_comparison_bars(runs, metrics[1:], "Single run comparison")

In [8]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '0106_232004',
        'name': "Graphless complex (dropout: 0.3, l: 3)"
    },
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=-0.7722420781826748, p=0.46217243481854287
precision: t=-2.2173633399706, p=0.05741777162627444
recall: t=2.9306129523209083, p=0.018980331648249596
roc_auc: t=-3.7878039314680736, p=0.005328489127517502
pr_auc: t=-4.840040787000261, p=0.0012880626449002148
f1_score: t=1.1801317672718736, p=0.27184647698707914
mcc: t=-0.5269416209436466, p=0.6125287882957842


In [10]:
runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '0106_233137',
        'name': "Graphless complex (dropout: 0.3, l: 4)"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=-1.873711081300969, p=0.09784721891077285
precision: t=-3.130520793394774, p=0.014004261228728625
recall: t=4.385749411766259, p=0.0023307708962342732
roc_auc: t=-4.746801254915543, p=0.0014512045141549617
pr_auc: t=-4.0824811393281335, p=0.0035220294616912632
f1_score: t=0.14571737419946673, p=0.8877491928881562
mcc: t=-1.649498857339703, p=0.13765488054964856


In [32]:

runs = [
    {
        'path': '1206_182941',
        'name': "Uses Graph"
    },
    {
        'path': '1215_044339',
        'name': "Graphless complex (dropout: 0.3)"
    }
]
get_comparison_bars(runs, metrics[1:], "Impact of PPI Graph on Performance")

accuracy: t=-2.378572549348777, p=0.044645033241818226
precision: t=-3.099392484188224, p=0.014679259159387866
recall: t=1.044526443176491, p=0.32677402340407274
roc_auc: t=-5.003944781379455, p=0.0010476515028065819
pr_auc: t=-5.351738119477179, p=0.0006844207528693353
f1_score: t=-0.6576223187845042, p=0.5292442497893071
mcc: t=-2.1615941151862734, p=0.0626336274025126


In [29]:
runs = [
    {
        'path': '1215_201954',
        'name': "Uses Graph with Step LR",
        'single_run': True
    },
    {
        'path': '1215_173550',
        'name': "Cosine Annealing with Warm Restarts",
        'single_run': True
    },
    {
        'path': '1215_171752',
        'name': "Cosine Annealing LR",
        'single_run': True
    }
]
get_comparison_bars(runs, metrics[1:], "Graph Model")

In [11]:
runs = [
    {
        'path': '0106_113749',
        'name': "Uses Graph",
        'single_run': True
    },
    {
        'path': '1223_221739',
        'name': "Complex, dropout: 0.2, layers: 2",
        'single_run': True
    },
    {
        'path': '1223_223038',
        'name': "Complex, dropout: 0.3, layers: 3",
        'single_run': True
    },
    {
        'path': '0106_233438',
        'name': "Complex, d: 0.3, l: 6, cos anneal warm restarts",
        'single_run': True
    },
    {
        'path': '0106_233527',
        'name': "Complex, d: 0.3, l: 6, cos anneal",
        'single_run': True
    },
    {
        'path': '0106_234924',
        'name': "Complex, d: 0.3, l: 9, cos anneal",
        'single_run': True
    },
]
get_comparison_bars(runs, metrics[1:], "Graph Model")