In [140]:
import os
import anndata as ad
import umap
import scipy as sip
from scipy import sparse
from sklearn import preprocessing
import scanpy as scp
import pickle
import pandas as pd
import numpy as np
import igraph as ig
import seaborn as sb
import snf
import sklearn as skl
from sklearn import cluster
from sklearn.metrics import adjusted_mutual_info_score as ami
from snf import compute, metrics
import matplotlib.pyplot as plt
from collections import Counter
import math
import itertools

In [141]:
data = os.path.abspath(os.path.join(os.getcwd(),"../data"))

input = os.path.join(data, "input")
output = os.path.join(data, "output")

complementary = os.path.join(input, "complementary")

shared = os.path.join(output, "shared_info_74")


level1 = os.path.join(output, "level1")
level2 = os.path.join(output, "level2")
level3 = os.path.join(output, "level3")

In [142]:
with open(os.path.join(level1, "graphs_74.pickle"), "rb") as f:
    level1_graphs = pickle.load(f) 
with open(os.path.join(level2, "graphs_74.pickle"), "rb") as f:
    level2_graphs = pickle.load(f) 
with open(os.path.join(level3, "graphs_74.pickle"), "rb") as f:
    level3_graphs = pickle.load(f) 

In [143]:
#Choose between ["feature_matrices_standard", "feature_matrices", "feature_matrices_quant_N", "feature_matrices_quant_U", "feature_matrices_minmax", "feature_matrices_maxabs"]. 
with open(os.path.join(level1, "feature_matrices_standard.pickle"), "rb") as f:
    level1_fm = pickle.load(f) 
with open(os.path.join(level2, "feature_matrices_standard.pickle"), "rb") as f:
    level2_fm = pickle.load(f) 
with open(os.path.join(level3, "feature_matrices_standard.pickle"), "rb") as f:
    level3_fm = pickle.load(f)

In [144]:
tr = "standard" #Choose between ["standard", "wot", "quant_N", "quant_U", "minmax", "maxabs"]. It should match the feature_matrix suffix above e.g. feature_matrices_standard goes with tr = standard.

In [145]:
with open(os.path.join(shared, "patients_74.pickle"), "rb") as f:
    patients_74 = pickle.load(f) 

with open(os.path.join(complementary, "module_name_to_id.pickle"), "rb") as f:
    module_name_to_id = pickle.load(f) 
with open(os.path.join(complementary, "id_to_module_name.pickle"), "rb") as f:
    id_to_module_name = pickle.load(f) 

with open(os.path.join(shared, "id_to_modality.pickle"), "rb") as f:
    id_to_modality = pickle.load(f) 
with open(os.path.join(shared, "modality_to_id.pickle"), "rb") as f:
    modality_to_id = pickle.load(f) 

with open(os.path.join(shared, "id_to_name.pickle"), "rb") as f:
    id_to_name = pickle.load(f) 
with open(os.path.join(shared, "name_to_id.pickle"), "rb") as f:
    name_to_id = pickle.load(f) 

with open(os.path.join(level1, "noise_graphs_74.pickle"), "rb") as f:
    noise_74 = pickle.load(f) 

with open(os.path.join(shared, "noise_name_to_id.pickle"), "rb") as f:
    noise_name_to_id = pickle.load(f) 
with open(os.path.join(shared, "noise_id_to_name.pickle"), "rb") as f:
    noise_id_to_name = pickle.load(f) 

with open(os.path.join(shared, "ground_truth_d.pickle"), "rb") as f:
    gt_d = pickle.load(f) 
with open(os.path.join(shared, "ground_truth_dg.pickle"), "rb") as f:
    gt_dg = pickle.load(f) 

In [146]:
diseases = list(level1_graphs["G0"].obs.Disease.unique())
disease_groups = list(level1_graphs["G0"].obs.DiseaseGroup.unique())

with open(os.path.join(shared, 'diseases.pickle'), 'wb') as f:
        pickle.dump(diseases, f)

with open(os.path.join(shared, 'disease_groups.pickle'), 'wb') as f:
        pickle.dump(disease_groups, f)

In [147]:
# For every modality, extract the set of graphs belonging to it
citeRNA_g_with_74 = {}
bulkRNA_g_with_74 = {}
luminex_g_with_74 = {}
cytof_g_with_74 = {}
adt_g_with_74 = {}
facs_g_with_74 = {}
for id, g in level1_graphs.items():
    modality = g.uns["modality"]
    if modality == "citeRNA":
        citeRNA_g_with_74[id] = g
    elif modality == "bulkRNA":
        bulkRNA_g_with_74[id] = g
    elif modality == "cytof":
        cytof_g_with_74[id] = g
    elif modality == "luminex":
        luminex_g_with_74[id] = g
    elif modality == "facs":
        facs_g_with_74[id] = g
    elif modality == "adt":
        adt_g_with_74[id] = g

In [148]:
interesting_patients = [
"S00027-Ja003",
"S00033-Ja003",
"S00033-Ja005",
"S00068-Ja005",
"S00095-Ja005",
"S00065-Ja003",
"S00005-Ja005",
"S00040-Ja005",
"S00119-Ja003",
"S00020-Ja003"]
interesting_patients = set([p.split("-")[0] for p in interesting_patients]) # Extract ids, remove duplicates
interesting_patients = list(set(patients_74).intersection(interesting_patients))


healthy_patients = [p for p in list(level1_graphs["G0"].obs_names) if level1_graphs["G0"][p].obs.Disease[0] == "HV"]


palette = ig.RainbowPalette(n=7)
disease_color_map = {diseases[i]:palette[i] for i in range(6)}
palette = ig.RainbowPalette(n=3)
disease_group_color_map = {disease_groups[i]:palette[i] for i in range(3)}

The Panelize-Reward-score checks the alignment of edge weights with the random-walker objective. In an ideal case, a random-walker transitions whithin the same disease group. Hence, the edge weights between patients with similar diseases are rewarded with +1. Weights between different disease are panelized with -1. Weights between healthy and patient are panelized with -2.

In [149]:
panelize_reward_d = []
panelize_reward_dg = []

for i in patients_74:
    panelize_reward_d_ij = []
    panelize_reward_dg_ij = []

    d1 = level1_graphs["G0"][i].obs.Disease[0]
    dg1 = level1_graphs["G0"][i].obs.DiseaseGroup[0]

    for j in patients_74:
        d2 = level1_graphs["G0"][j].obs.Disease[0]
        dg2 = level1_graphs["G0"][j].obs.DiseaseGroup[0]

        if d1 == d2:
            panelize_reward_d_ij.append(1)
        else:
            panelize_reward_d_ij.append(-1)
        """elif dg1 == dg2:
            p_d_ij.append(0.5)
        elif d1 == "HV" or d2 == "HV":
            p_d_ij.append(-1)
        else:
            p_d_ij.append(-0.5)"""

        if dg1 == dg2:
            panelize_reward_dg_ij.append(1)
        elif dg1 == "Healthy" or dg2 == "Healthy":
            panelize_reward_dg_ij.append(-2)
        else:
            panelize_reward_dg_ij.append(-1)

    panelize_reward_d.append(panelize_reward_d_ij)
    panelize_reward_dg.append(panelize_reward_dg_ij)


for i in range(len(panelize_reward_dg)):
    panelize_reward_d[i][i] = 0
    panelize_reward_dg[i][i] = 0

panelize_reward_d = np.array(panelize_reward_d, dtype="float64")
panelize_reward_dg = np.array(panelize_reward_dg, dtype="float64")

In [150]:
optimal_graph = []
for i, dg1 in enumerate(list(level1_graphs["G0"].obs.DiseaseGroup)):
    neighbors_of_i = []
    for j, dg2 in enumerate(list(level1_graphs["G0"].obs.DiseaseGroup)):
        if i == j or dg1 != dg2:
            neighbors_of_i.append(0)
        else:
            neighbors_of_i.append(1)
    optimal_graph.append(neighbors_of_i)
    
optimal_graph = np.array(optimal_graph, dtype = "float64")
print(optimal_graph)

optimal_rw_score = 74

[[0. 1. 1. ... 0. 0. 0.]
 [1. 0. 1. ... 0. 0. 0.]
 [1. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 1. 1.]
 [0. 0. 0. ... 1. 0. 1.]
 [0. 0. 0. ... 1. 1. 0.]]


In [151]:
modality_frequency_weights = {"citeRNA": 1/209, "bulkRNA": 1/12, "adt": 1/11, "facs": 1, "luminex": 1, "cytof": 1/12 }
modality_frequency_weights  = {id: modality_frequency_weights[g.uns["modality"]] * 1/ 6 for id, g in level1_graphs.items()}
weighted_avg_weights = {id: 1/len(level1_graphs)  for id, g in level1_graphs.items()}

## Functions

In [152]:
def make_transition_matrix(fused_network):
    tm = []
    for i, row in enumerate(fused_network):
        tm.append(row/ sum(row))
    return tm

def random_walker_objective(transition_matrix):
    d_score = (transition_matrix * panelize_reward_d).sum()
    dg_score = (transition_matrix * panelize_reward_dg).sum()
    #score = 0.5 * d_score + 0.5 * dg_score
    score =  dg_score
    return d_score, dg_score, score

def adjust_weights(psns, scale_weights = modality_frequency_weights):
    adjusted_weights = {}
    for id, psn in psns.items():
        adjusted_weights[id] = psn * modality_frequency_weights[id]
    return adjusted_weights

In [153]:
def knn(g, k, metric): # g is an AnnData Object
    new_g = g.copy()
    if not tr == "wot":
        new_g.X = new_g.layers[tr]
    scp.pp.neighbors(new_g, n_neighbors = k, use_rep='X', metric =  metric) 
    return np.array(sparse.csr_matrix.todense(new_g.obsp["connectivities"]), dtype = "float64") # Returns a matrix

def aff(g, metric, k, mu ): # g is a list of matrices 
    return compute.make_affinity(g, metric=metric, K=k, mu=mu, normalize = False)  # Returns a list of matrices

# Optimization

## Optimizing PSNs

In [154]:
# uncomment if you want to optimize the hyperparameters of WA

"""setup_knn_pr = {}
setup_knn_s = {}
setup_knn_db = {}

ks = [4,5,6,7,8]
metrics = ["cosine", "euclidean", "sqeuclidean", "correlation", "seuclidean", "minkowski", "chebyshev", "canberra"]
combinations = set(itertools.product(ks, metrics))

for i, combi in enumerate(combinations):
    print(str(i), "/", str(len(combinations)))
    try:
        k = combi[0]
        metric = combi[1]
        graphs = {}
        fused = None
        for id, g in level1_graphs.items():
            weight = modality_frequency_weights[id]
            graphs[id] = knn(g, k, metric)
            
            if fused is None:
                fused = weight * graphs[id]
            else:
                fused = weight * graphs[id] + fused
            
       
        transition = make_transition_matrix(fused)
        pr_score = random_walker_objective(transition)[2]

        ad_obj = ad.AnnData(fused)
        ad_obj.obsp['connectivities'] = fused
        scp.tl.leiden(ad_obj, resolution=1, key_added='leiden', adjacency=fused, directed=False, use_weights=True)
        l = list(ad_obj.obs.leiden)

        s_score = skl.metrics.silhouette_score(fused, l)
        db_score = skl.metrics.davies_bouldin_score(fused, l)
   
    
        print("success")
        setup_knn_pr[pr_score] = {"k":k, "metric": metric}
        setup_knn_s[s_score] = {"k":k, "metric": metric}
        setup_knn_db[db_score] = {"k":k, "metric": metric}

    except np.linalg.LinAlgError:
        print(i, "fail")
        continue
    except BaseException:
        print(i, "fail")
        continue"""

'setup_knn_pr = {}\nsetup_knn_s = {}\nsetup_knn_db = {}\n\nks = [4,5,6,7,8]\nmetrics = ["cosine", "euclidean", "sqeuclidean", "correlation", "seuclidean", "minkowski", "chebyshev", "canberra"]\ncombinations = set(itertools.product(ks, metrics))\n\nfor i, combi in enumerate(combinations):\n    print(str(i), "/", str(len(combinations)))\n    try:\n        k = combi[0]\n        metric = combi[1]\n        graphs = {}\n        fused = None\n        for id, g in level1_graphs.items():\n            weight = modality_frequency_weights[id]\n            graphs[id] = knn(g, k, metric)\n            \n            if fused is None:\n                fused = weight * graphs[id]\n            else:\n                fused = weight * graphs[id] + fused\n            \n       \n        transition = make_transition_matrix(fused)\n        pr_score = random_walker_objective(transition)[2]\n\n        ad_obj = ad.AnnData(fused)\n        ad_obj.obsp[\'connectivities\'] = fused\n        scp.tl.leiden(ad_obj, resol

In [155]:
# uncomment if you want to optimize the hyperparameters of WA

"""setup_knn_pr = {k: v for k, v in sorted(setup_knn_pr.items(), key=lambda item: item[0])} # sort according to scores
best_knn_pr = setup_knn_pr[list(setup_knn_pr.keys())[-1]]
setup_knn_s = {k: v for k, v in sorted(setup_knn_s.items(), key=lambda item: item[0])} 
best_knn_s = setup_knn_s[list(setup_knn_s.keys())[-1]]
setup_knn_db = {k: v for k, v in sorted(setup_knn_db.items(), key=lambda item: item[0])} 
best_knn_db = setup_knn_db[list(setup_knn_db.keys())[0]]

print(best_knn_pr, best_knn_s, best_knn_db)"""

'setup_knn_pr = {k: v for k, v in sorted(setup_knn_pr.items(), key=lambda item: item[0])} # sort according to scores\nbest_knn_pr = setup_knn_pr[list(setup_knn_pr.keys())[-1]]\nsetup_knn_s = {k: v for k, v in sorted(setup_knn_s.items(), key=lambda item: item[0])} \nbest_knn_s = setup_knn_s[list(setup_knn_s.keys())[-1]]\nsetup_knn_db = {k: v for k, v in sorted(setup_knn_db.items(), key=lambda item: item[0])} \nbest_knn_db = setup_knn_db[list(setup_knn_db.keys())[0]]\n\nprint(best_knn_pr, best_knn_s, best_knn_db)'

In [156]:
best_knn = {'k': 7, 'metric': 'correlation'} # Comment out if you are using optimal values

In [157]:
#uncomment if you want to optiimize the hyperparameters of SNF

"""setup_kernel_pr = {}
setup_kernel_s = {}
setup_kernel_db = {}

ks = [4,5,6,7,8]
metrics = ["cosine", "euclidean", "sqeuclidean", "correlation", "seuclidean", "minkowski", "mahalanobis", "chebyshev", "canberra"]
mus = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

combinations = set(itertools.product(ks, metrics, mus))

for i, combi in enumerate(combinations):
    print(str(i), "/", str(len(combinations)))
    try:
        k = combi[0]
        metric = combi[1]
        mu = combi[2] 
        
        graphs = aff(list(level1_fm.values()), metric, k, mu)
        fused = compute.snf(graphs, K=k, t = 40)

        transition = make_transition_matrix(fused)
        score_pr = random_walker_objective(transition)[2]

        
        first, second = compute.get_n_clusters(fused)
        l = cluster.spectral_clustering(fused, n_clusters=first, n_init = 40)

        score_s = skl.metrics.silhouette_score(fused, l)
        score_db = skl.metrics.davies_bouldin_score(fused, l)

        print("success")
        setup_kernel_pr[score_pr] = {"k":k, "metric": metric, "mu": mu}
        setup_kernel_s[score_s] = {"k":k, "metric": metric, "mu": mu}
        setup_kernel_db[score_db] = {"k":k, "metric": metric, "mu": mu}
        
    except ValueError:
        print(i, "fail")
        continue"""

'setup_kernel_pr = {}\nsetup_kernel_s = {}\nsetup_kernel_db = {}\n\nks = [4,5,6,7,8]\nmetrics = ["cosine", "euclidean", "sqeuclidean", "correlation", "seuclidean", "minkowski", "mahalanobis", "chebyshev", "canberra"]\nmus = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]\n\ncombinations = set(itertools.product(ks, metrics, mus))\n\nfor i, combi in enumerate(combinations):\n    print(str(i), "/", str(len(combinations)))\n    try:\n        k = combi[0]\n        metric = combi[1]\n        mu = combi[2] \n        \n        graphs = aff(list(level1_fm.values()), metric, k, mu)\n        fused = compute.snf(graphs, K=k, t = 40)\n\n        transition = make_transition_matrix(fused)\n        score_pr = random_walker_objective(transition)[2]\n\n        \n        first, second = compute.get_n_clusters(fused)\n        l = cluster.spectral_clustering(fused, n_clusters=first, n_init = 40)\n\n        score_s = skl.metrics.silhouette_score(fused, l)\n        score_db = skl.metrics.davies_bouldin_score(fused, l)\n

In [158]:
#uncomment if you want to optiimize the hyperparameters of SNF

"""setup_kernel_pr = {k: v for k, v in sorted(setup_kernel_pr.items(), key=lambda item: item[0])} # sort according to scores
best_kernel_pr = setup_kernel_pr[list(setup_kernel_pr.keys())[-1]]
setup_kernel_s = {k: v for k, v in sorted(setup_kernel_s.items(), key=lambda item: item[0])} 
best_kernel_s = setup_kernel_s[list(setup_kernel_s.keys())[-1]]
setup_kernel_db = {k: v for k, v in sorted(setup_kernel_db.items(), key=lambda item: item[0])} 
best_kernel_db = setup_kernel_db[list(setup_kernel_db.keys())[0]]
print(best_kernel_pr,best_kernel_s, best_kernel_db )"""

'setup_kernel_pr = {k: v for k, v in sorted(setup_kernel_pr.items(), key=lambda item: item[0])} # sort according to scores\nbest_kernel_pr = setup_kernel_pr[list(setup_kernel_pr.keys())[-1]]\nsetup_kernel_s = {k: v for k, v in sorted(setup_kernel_s.items(), key=lambda item: item[0])} \nbest_kernel_s = setup_kernel_s[list(setup_kernel_s.keys())[-1]]\nsetup_kernel_db = {k: v for k, v in sorted(setup_kernel_db.items(), key=lambda item: item[0])} \nbest_kernel_db = setup_kernel_db[list(setup_kernel_db.keys())[0]]\nprint(best_kernel_pr,best_kernel_s, best_kernel_db )'

In [159]:
best_kernel = {'k': 7, 'metric': 'sqeuclidean', 'mu': 0.3} # comment out if you are using optimal values

# Level1

Given Level 1 pseudobulks, generate noise and non-noise knn-based PSNS

In [160]:
level1_knn_psns = {}
for id, g in level1_graphs.items():
    graph = knn(g, best_knn["k"], best_knn["metric"])
    level1_knn_psns[id] = graph

noise_knn_psns = {}
for id, g in noise_74.items():
    graph = knn(g, best_knn["k"], best_knn["metric"])
    noise_knn_psns[id] = graph

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


Given Level 1 pseudobulks, generate noise and non-noise kernel-based PSNS

In [161]:
level1_kernel_psns = {}
graphs = aff(list(level1_fm.values()), metric=best_kernel["metric"], k=best_kernel["k"], mu=best_kernel["mu"])
for i, id in enumerate(level1_fm.keys()):
    level1_kernel_psns[id] = graphs[i]

noise_kernel_psns = {}
graphs_noise = [g.X.copy() for g in noise_74.values()]
graphs_noise = compute.make_affinity(graphs_noise, metric=best_kernel["metric"], K=best_kernel["k"], mu=best_kernel["mu"], normalize = False)
for i, id in enumerate(noise_74.keys()):
    noise_kernel_psns[id] = graphs_noise[i]

In [162]:
level1_correlations = {}
for id, ad_obj in level1_graphs.items():
    level1_correlations[id] = ad_obj.to_df().T.corr().to_numpy()

noise_correlations = {}
for id, ad_obj in noise_74.items():
    noise_correlations[id] = ad_obj.to_df().T.corr().to_numpy()

In [163]:
for id in noise_kernel_psns.keys():
    np.random.shuffle(noise_kernel_psns[id])
    np.random.shuffle(noise_knn_psns[id])
    np.random.shuffle(noise_correlations[id])

In [164]:
with open(os.path.join(level1, 'correlations.pickle'), 'wb') as f:
    pickle.dump(level1_correlations, f)
with open(os.path.join(level1, 'knn_PSNs.pickle'), 'wb') as f:
    pickle.dump(level1_knn_psns, f)
with open(os.path.join(level1, 'kernel_PSNs.pickle'), 'wb') as f:
    pickle.dump(level1_kernel_psns, f)


with open(os.path.join(level1, 'noise_correlations.pickle'), 'wb') as f:
    pickle.dump(noise_correlations, f)
with open(os.path.join(level1, 'noise_knn_PSNs.pickle'), 'wb') as f:
    pickle.dump(noise_knn_psns, f)
with open(os.path.join(level1, 'noise_kernel_PSNs.pickle'), 'wb') as f:
    pickle.dump(noise_kernel_psns, f)

In [165]:
all_kernel_psns = list(noise_kernel_psns.values()) + list(level1_kernel_psns.values())
all_knn_psns = list(noise_knn_psns.values()) + list(level1_knn_psns.values()) 
all_feature_matrices = [noise.X for noise in noise_74.values()] + list(level1_fm.values()) 
all_correlations = list(noise_correlations.values()) + list(level1_correlations.values())
all_names = list(noise_kernel_psns.keys()) + list(level1_kernel_psns.keys()) 

In [166]:
all_dict = {"knn_psns": all_knn_psns, "kernel_psns": all_kernel_psns, "feature_matrices": all_feature_matrices,  "correlations": all_correlations, "names":all_names}

with open(os.path.join(level1, 'noise_and_none_noise_combined.pickle'), 'wb') as f:
    pickle.dump(all_dict, f)

# Level2

In [167]:
level2_kernel_psns = {}
graphs = aff(list(level2_fm.values()), metric=best_kernel["metric"], k=best_kernel["k"], mu=best_kernel["mu"])

for i, id in enumerate(level2_fm.keys()):
    level2_kernel_psns[id] = graphs[i]

In [168]:
level2_knn_psns = {}
        
for id, g in level2_graphs.items():
    graph = knn(g, best_knn["k"], best_knn["metric"])
    level2_knn_psns[id] = graph

  utils.warn_names_duplicates("var")


In [169]:
level2_correlations = {}
for id, ad_obj in level2_graphs.items():
    level2_correlations[id] = ad_obj.to_df().T.corr().to_numpy()

In [170]:
len(level2_correlations) == len(level2_knn_psns) == len(level2_kernel_psns) == 37

True

In [171]:
with open(os.path.join(level2, 'correlations.pickle'), 'wb') as f:
    pickle.dump(level2_correlations, f)
with open(os.path.join(level2, 'knn_PSNs.pickle'), 'wb') as f:
    pickle.dump(level2_knn_psns, f)
with open(os.path.join(level2, 'kernel_PSNs.pickle'), 'wb') as f:
    pickle.dump(level2_kernel_psns, f)

# Level3

In [172]:
level3_kernel_psns = {}
graphs = aff(list(level3_fm.values()), metric=best_kernel["metric"], k=best_kernel["k"], mu=best_kernel["mu"])
for i, id in enumerate(level3_graphs.keys()):
    level3_kernel_psns[id] = graphs[i]

In [173]:
level3_knn_psns = {}       
for id, g in level3_graphs.items():
    graph = knn(g, best_knn["k"], best_knn["metric"])
    level3_knn_psns[id] = graph

  utils.warn_names_duplicates("var")


In [174]:
level3_correlations = {}
for id, ad_obj in level3_graphs.items():
    level3_correlations[id] = ad_obj.to_df().T.corr().to_numpy()

In [175]:
len(level3_correlations) == len(level3_knn_psns) == len(level3_kernel_psns) == len(level3_fm) == 6

True

In [176]:
with open(os.path.join(level3, 'correlations.pickle'), 'wb') as f:
    pickle.dump(level3_correlations, f)
with open(os.path.join(level3, 'knn_PSNs.pickle'), 'wb') as f:
    pickle.dump(level3_knn_psns, f)
with open(os.path.join(level3, 'kernel_PSNs.pickle'), 'wb') as f:
    pickle.dump(level3_kernel_psns, f)

# Finding the Weights for the Weighted Average fusion

uncomment the code snippets below to find and sace optimal weights

In [177]:
def weighted_avg_optimize(num_iter, psns):
    adjusted_weights = {}

    score_avg = {}
    fused_graphs_avg = {}

    for iteration in range(num_iter):
        weights = {}
        random_weights = np.random.uniform(low=0, high=10, size=len(psns))
        random_weights = [w/ sum(random_weights) for w in random_weights]
        
        fused_network = 0
        for i, id in enumerate(psns.keys()):
            weights[id] = random_weights[i]
            fused_network = fused_network + weights[id] * psns[id]
          

        transition_matrix = make_transition_matrix(fused_network)
        d_s, dg_s, s = random_walker_objective(transition_matrix)
    
        fused_graphs_avg[iteration] = fused_network
        score_avg[iteration] = s
        adjusted_weights[iteration]  = weights
    

    score_avg = {k: v for k, v in sorted(score_avg.items(), key=lambda item: item[1])} # sort according to scores
    best_iteration_avg = list(score_avg.keys())[-1]
    best_weights_avg = adjusted_weights[best_iteration_avg]
    best_fused_graph_avg = fused_graphs_avg[best_iteration_avg]
    best_score_avg = score_avg[best_iteration_avg]

    return best_score_avg, best_weights_avg, best_fused_graph_avg

In [178]:
#best_pns_score, best_psn_weights, best_wa_psn_graph = weighted_avg_optimize(100000, level1_knn_psns)

In [179]:
#best_noise_score, best_noise_weights, best_wa_noise_graph = weighted_avg_optimize(100000, noise_knn_psns)

In [180]:
#best_score_all, best_weights_all, best_wa_graph_all = weighted_avg_optimize(150000, {all_names[i]: all_knn_psns[i] for i in range(len(all_names))})

In [181]:
#best_pns_score2, best_psn_weights2, best_wa_psn_graph2 = weighted_avg_optimize(100000, level2_knn_psns)

In [182]:
#best_pns_score3, best_psn_weights3, best_wa_psn_graph3 = weighted_avg_optimize(100000, level3_knn_psns)

In [183]:
"""modality_frequency_weights2 = {"citeRNA": 1/11, "bulkRNA": 1, "adt": 1/11, "facs": 1, "luminex": 1, "cytof": 1/12 }
modality_frequency_weights2  = {id: modality_frequency_weights2[g.uns["modality"]] * 1/6 for id, g in level2_graphs.items()}
weighted_avg_weights2 = {id: 1/len(level2_graphs)  for id, g in level2_graphs.items()}"""

'modality_frequency_weights2 = {"citeRNA": 1/11, "bulkRNA": 1, "adt": 1/11, "facs": 1, "luminex": 1, "cytof": 1/12 }\nmodality_frequency_weights2  = {id: modality_frequency_weights2[g.uns["modality"]] * 1/6 for id, g in level2_graphs.items()}\nweighted_avg_weights2 = {id: 1/len(level2_graphs)  for id, g in level2_graphs.items()}'

In [184]:
"""modality_frequency_weights3 = {id: 1/len(level3_graphs)  for id, g in level3_graphs.items()}
weighted_avg_weights3 = {id: 1/len(level2_graphs)  for id, g in level3_graphs.items()}"""

'modality_frequency_weights3 = {id: 1/len(level3_graphs)  for id, g in level3_graphs.items()}\nweighted_avg_weights3 = {id: 1/len(level2_graphs)  for id, g in level3_graphs.items()}'

In [185]:
"""with open(os.path.join(level1, 'weights_NOISE.pickle'), 'wb') as f:
    pickle.dump(best_noise_weights, f)
with open(os.path.join(level1, 'weights_ALL.pickle'), 'wb') as f:
    pickle.dump(best_weights_all, f)

with open(os.path.join(level1, 'weights_modality_frequency.pickle'), 'wb') as f:
    pickle.dump(modality_frequency_weights, f)
with open(os.path.join(level1, 'weights_uniform.pickle'), 'wb') as f:
    pickle.dump(weighted_avg_weights, f)
with open(os.path.join(level1, 'weights_optimized.pickle'), 'wb') as f:
    pickle.dump(best_psn_weights, f)

with open(os.path.join(level2, 'weights_optimized.pickle'), 'wb') as f:
    pickle.dump(best_psn_weights2, f)
with open(os.path.join(level2, 'weights_modality_frequency.pickle'), 'wb') as f:
    pickle.dump(modality_frequency_weights2, f)
with open(os.path.join(level2, 'weights_uniform.pickle'), 'wb') as f:
    pickle.dump(weighted_avg_weights2, f)


with open(os.path.join(level3, 'weights_optimized.pickle'), 'wb') as f:
    pickle.dump(best_psn_weights3, f)
with open(os.path.join(level3, 'weights_modality_frequency.pickle'), 'wb') as f:
    pickle.dump(modality_frequency_weights3, f)
with open(os.path.join(level3, 'weights_uniform.pickle'), 'wb') as f:
    pickle.dump(weighted_avg_weights3, f)"""

"with open(os.path.join(level1, 'weights_NOISE.pickle'), 'wb') as f:\n    pickle.dump(best_noise_weights, f)\nwith open(os.path.join(level1, 'weights_ALL.pickle'), 'wb') as f:\n    pickle.dump(best_weights_all, f)\n\nwith open(os.path.join(level1, 'weights_modality_frequency.pickle'), 'wb') as f:\n    pickle.dump(modality_frequency_weights, f)\nwith open(os.path.join(level1, 'weights_uniform.pickle'), 'wb') as f:\n    pickle.dump(weighted_avg_weights, f)\nwith open(os.path.join(level1, 'weights_optimized.pickle'), 'wb') as f:\n    pickle.dump(best_psn_weights, f)\n\nwith open(os.path.join(level2, 'weights_optimized.pickle'), 'wb') as f:\n    pickle.dump(best_psn_weights2, f)\nwith open(os.path.join(level2, 'weights_modality_frequency.pickle'), 'wb') as f:\n    pickle.dump(modality_frequency_weights2, f)\nwith open(os.path.join(level2, 'weights_uniform.pickle'), 'wb') as f:\n    pickle.dump(weighted_avg_weights2, f)\n\n\nwith open(os.path.join(level3, 'weights_optimized.pickle'), 'wb') 