In [8]:
import numpy as np
import networkx as nx
import pandas as pd


In [41]:
dc = pd.read_csv("../data/networks/coip_hc_full.tsv", sep = "\t", header = None)
Gc = nx.from_pandas_edgelist(dc, 0, 1, 2)
dy = pd.read_csv("../data/networks/y2h_hc_full.tsv", sep = "\t", header = None)
Gy = nx.from_pandas_edgelist(dy, 0, 1, 2)
dcs = pd.read_csv("../data/networks/coip_hc_shared.tsv", sep = "\t", header = None)
Gcs = nx.from_pandas_edgelist(dcs, 0, 1, 2)
dys = pd.read_csv("../data/networks/y2h_hc_shared.tsv", sep = "\t", header = None)
Gys = nx.from_pandas_edgelist(dys, 0, 1, 2)

In [25]:
def compute_random_samples_barabasi(n_nodes, n_avg_edges, runs, metrics = {}):
    results = {}
    for i in range(runs):
        print(f"Run: {i+1}")
        Gb = nx.barabasi_albert_graph(n_nodes, n_avg_edges)
        for metric_id, metric in metrics.items():
            if metric_id not in results:
                results[metric_id] = []
            results[metric_id].append(metric(Gb))
    return results

metrics = {"3-clustering" : (lambda G: nx.average_clustering(G)),
          "4-clustering": (lambda G: np.mean(list(nx.square_clustering(G).values())))}

In [44]:
from scipy.stats import t
def compute_p_value_onetail(result, value):
    dof = len(result)
    mean = np.average(result)
    std  = np.sqrt(dof/(dof - 1)) * np.std(result) # unbiased
    t_score = (value - mean) / (std) 
    print(t_score)
    return 1 - t.cdf(t_score, dof)

In [43]:
Gnets = {"COIP" : Gc, "Y2H" : Gy, "S-COIP" : Gcs, "S-Y2H": Gys}
adegs = {k : np.average(list(dict(G.degree).values())) for k, G in Gnets.items()}
adegs

{'COIP': 6.933001304399383,
 'Y2H': 9.654263986963606,
 'S-COIP': 6.893457006768614,
 'S-Y2H': 6.324160565513759}

In [46]:
measures = dict()
for netname, G in Gnets.items():
    print(f"Running experiments on {netname}")
    measures[netname] = compute_random_samples_barabasi(len(G), int(adegs[netname] + 1/2), 20, metrics)

Running experiments on COIP
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
Run: 13
Run: 14
Run: 15
Run: 16
Run: 17
Run: 18
Run: 19
Run: 20
Running experiments on Y2H
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
Run: 13
Run: 14
Run: 15
Run: 16
Run: 17
Run: 18
Run: 19
Run: 20
Running experiments on S-COIP
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
Run: 13
Run: 14
Run: 15
Run: 16
Run: 17
Run: 18
Run: 19
Run: 20
Running experiments on S-Y2H
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
Run: 13
Run: 14
Run: 15
Run: 16
Run: 17
Run: 18
Run: 19
Run: 20


In [49]:
reported_scores = {"3_clust": {}, "4_clust": {}}
for netname, G in Gnets.items():
    reported_scores["3_clust"][netname] = metrics["3-clustering"](G)
    reported_scores["4_clust"][netname] = metrics["4-clustering"](G)

In [50]:
reported_scores

{'3_clust': {'COIP': 0.1491005172236027,
  'Y2H': 0.0545058052240701,
  'S-COIP': 0.16051943577950162,
  'S-Y2H': 0.046768868100458855},
 '4_clust': {'COIP': 0.04410007120960226,
  'Y2H': 0.030597263163203638,
  'S-COIP': 0.04785061857566836,
  'S-Y2H': 0.025622544396029873}}

In [53]:
for netname, G in Gnets.items():
    reported_scores["3_clust"][f"{netname}-pvalue"] = compute_p_value_onetail(measures[netname]["3-clustering"], reported_scores["3_clust"][netname])
    reported_scores["4_clust"][f"{netname}-pvalue"] = compute_p_value_onetail(measures[netname]["4-clustering"], reported_scores["4_clust"][netname])

340.3431903342301
595.4827890160647
91.25667680805545
240.6870406561348
189.36715313906222
209.7619319744132
31.1767837696944
161.384166834821


In [54]:
reported_scores

{'3_clust': {'COIP': 0.1491005172236027,
  'Y2H': 0.0545058052240701,
  'S-COIP': 0.16051943577950162,
  'S-Y2H': 0.046768868100458855,
  'COIP-pvalue': 0.0,
  'Y2H-pvalue': 0.0,
  'S-COIP-pvalue': 0.0,
  'S-Y2H-pvalue': 0.0},
 '4_clust': {'COIP': 0.04410007120960226,
  'Y2H': 0.030597263163203638,
  'S-COIP': 0.04785061857566836,
  'S-Y2H': 0.025622544396029873,
  'COIP-pvalue': 0.0,
  'Y2H-pvalue': 0.0,
  'S-COIP-pvalue': 0.0,
  'S-Y2H-pvalue': 0.0}}