In [16]:
import numpy as np
import copy
import pandas as pd
import os
import sys
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

n_cores = multiprocessing.cpu_count()
sys.path.append("../node2vec2rank/")


sys.path.insert(1, os.path.realpath(os.path.pardir))

In [17]:
## experiment info 
tissue = 'YEAST'
network_inf_method = 'BONOBO'
gsea_parent_folder_name = 'BONOBO_yeast_e2e_48121620'
save_gsea_results_notes = ''

In [18]:
from node2vec2rank.dataloader import DataLoader

import json

#read the config file
config = json.load(open('../configs/config_bonobo_e2e.json', 'r'))

config = {param: value for section, params in config.items()
          for param, value in params.items()}

dataloader = DataLoader(config)

There are 3551 row nodes and 3551 column nodes in graph 1
There are 3551 row nodes and 3551 column nodes in graph 2
There are 3551 row nodes and 3551 column nodes in graph 3


In [19]:
# first = dataloader.graphs[0]
# second =  dataloader.graphs[1]
# third =  dataloader.graphs[2]

In [20]:
# first_two = pd.concat([first, second]).groupby(level=0).mean()


In [21]:
# dataloader.graphs[0] = first_two
# dataloader.graphs[1] = third
# del dataloader.graphs[2]


In [22]:
from node2vec2rank.model import N2V2R

model = N2V2R(graphs=dataloader.graphs, config=config, node_names=dataloader.interest_nodes)
rankings = model.fit_transform_rank()

DeDi_ranking = model.degree_difference_ranking()
borda_rankings = model.aggregate_transform()

signed_rankings = model.signed_ranks_transform()

../output/08_20_2023_23_10_44

Running n2v2r with dimensions [4, 12, 8, 16, 20] and distance metrics ['euclidean', 'cosine'] ...


	UASE embedding in 0.05 seconds for bin=False and keep_top=100%
		Ranking in 1.43 seconds
n2v2r computed 20 rankings for 2 comparison(s) in 2.58 seconds

Rank aggregation with Borda ...
	Finished aggregation in 1.91 seconds

Signed ranks transformation ...
	Finished signed transformation in 0.73 seconds


In [23]:
# from node2vec2rank.model_norm import compute_embedding_norm_differences
# import matplotlib.pyplot as plt


# first_embeddings = model.node_embeddings[0]
# second_embeddings = model.node_embeddings[1]

# first_embeddings = first_embeddings - first_embeddings.mean(axis=0, keepdims=True)
# second_embeddings = second_embeddings - second_embeddings.mean(axis=0, keepdims=True)


# first_norm, second_norm, difs = compute_embedding_norm_differences(first_embeddings, second_embeddings)

# DeDi = grns[0].sum(axis=0) - grns[1].sum(axis=0)
# first_degrees = grns[0].sum(axis=0)
# second_degrees = grns[1].sum(axis=0)

# plt.scatter(first_norm,first_degrees)
# plt.scatter(second_norm,second_degrees)

# plt.scatter(difs,DeDi)



In [24]:
target = 'sVSg2'

comparison = '2vs3'
n2v2r_ranking_pd = rankings[comparison]
n2v2r_borda_ranking_pd = borda_rankings[comparison]
n2v2r_DeDi_ranking_pd = signed_rankings[comparison]
n2v2r_borda_DeDi_ranking_pd = signed_rankings[comparison]
DeDi_ranking_pd = DeDi_ranking[comparison]


# map orf to gene name
yeast_map_fn = '../data/gene_set_libraries/yeast/yeast_orf_to_symbol_mapping.tsv'
yeast_map = pd.read_csv(yeast_map_fn, sep="\t")
orf2symbol = {i['orf']:i['name'] for k,i in yeast_map.iterrows()}
genes_mapped = [orf2symbol[x] if x in orf2symbol else x for x in dataloader.interest_nodes ]

n2v2r_ranking_pd.index = genes_mapped
n2v2r_borda_ranking_pd.index = genes_mapped
n2v2r_DeDi_ranking_pd.index = genes_mapped
n2v2r_borda_DeDi_ranking_pd.index = genes_mapped
DeDi_ranking_pd.index = genes_mapped


In [25]:
from node2vec2rank.visualization_utils import dim_reduction, plot_embeddings

algorithm = 'pca'
n_components = 3

node_names = n2v2r_borda_ranking_pd.index.to_list()

first_embeddings = model.node_embeddings[0]
second_embeddings = model.node_embeddings[1]
# third_embeddings = model.node_embeddings[2]

concat_embeddings = np.concatenate((first_embeddings, second_embeddings), axis=0)

first_embeddings_red = dim_reduction(
    first_embeddings[:, :6], algorithm=algorithm, n_components=n_components)
second_embeddings_red = dim_reduction(
    second_embeddings[:, :6], algorithm=algorithm, n_components=n_components)
# third_embeddings = dim_reduction(
#     third_embeddings[:, :6], algorithm=algorithm, n_components=n_components)
concat_embeddings_red = dim_reduction(
    concat_embeddings[:, :6], algorithm=algorithm, n_components=n_components)

plot_embeddings(first_embeddings_red, color_type='numeric',
                color=np.log(n2v2r_borda_ranking_pd.loc[node_names, 'borda_ranks']), names=node_names)
plot_embeddings(second_embeddings_red, color_type='numeric',
                color=np.log(n2v2r_borda_ranking_pd.loc[node_names, 'borda_ranks']), names=node_names)


num_nodes = first_embeddings_red.shape[0]
color_one = np.zeros(num_nodes)
color_two = np.ones(num_nodes)
color_three = 2*np.ones(num_nodes)

color_concat = np.concatenate((color_one, color_two, color_three), axis=0)

plot_embeddings(concat_embeddings_red, color=color_concat,
                names=np.concatenate((node_names, node_names,node_names)))

In [26]:
# import scipy.stats as stats

# list_of_taus = []
# list_of_comparisons = []

# for i in range(len(n2v2r_ranking_pd.columns)):
#     for j in range(i+1,len(n2v2r_ranking_pd.columns)):
#         first_series = n2v2r_ranking_pd.iloc[:,i].copy()
#         second_series = n2v2r_ranking_pd.iloc[:,j].copy()

#         first_series.sort_values(ascending=False,inplace=True)
#         second_series.sort_values(ascending=False,inplace=True)

#         first_index = first_series.index
#         second_index = second_series.index

#         tau, p_value = stats.kendalltau(first_index, second_index)
#         list_of_taus.append(tau)
#         list_of_comparisons.append(n2v2r_ranking_pd.columns[i]+" vs " +n2v2r_ranking_pd.columns[j])


# plt.hist(list_of_taus, bins=100, log=True);

# indices_high_cor=np.where(np.array(list_of_taus)>0.3)[0]
# result = [list_of_comparisons[q] for q in indices_high_cor]
# result

In [27]:
# run enrich GSEA
from node2vec2rank.post_utils import enrichr_gseapy, read_gmt
from itertools import chain
import os

save_results = True


# read the geneset libraries
kegg_pathway_fn = '../data/gene_set_libraries/yeast/KEGG_2018_yeast.gmt'
gobp_pathway_fn = '../data/gene_set_libraries/yeast/GO_Biological_Process_2018_yeast.gmt'

# network_background or pathway_background for enrichment
# network will use the genes in the network only, while pathway will use all the genes in the pathways
# network is "more fair" but will find less things in small networks
background = 'network_background'
organism = 'yeast'

# take the top k percentage of the ranking for enrichment
top_k_percent = 10

if background == 'network_background':
    kegg_background = n2v2r_ranking_pd.index.to_list()
    gobp_background = n2v2r_ranking_pd.index.to_list()
elif background == 'pathway_background':
    kegg_dict = read_gmt(kegg_pathway_fn)
    kegg_background = list(set(chain.from_iterable(kegg_dict.values())))
    gobp_dict = read_gmt(gobp_pathway_fn)
    gobp_background = list(set(chain.from_iterable(gobp_dict.values())))
else:
    raise Exception("Enrichment background not properly set")

n2v2r_consensus_enr_KEGG_pd = enrichr_gseapy(n2v2r_ranking_pd, kegg_pathway_fn, background=kegg_background,enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_consensus_enr_GOBP_pd = enrichr_gseapy(n2v2r_ranking_pd, gobp_pathway_fn, background=gobp_background,enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_enr_KEGG_pd = enrichr_gseapy(n2v2r_ranking_pd.iloc[:,[0]], kegg_pathway_fn, background=kegg_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_enr_GOBP_pd = enrichr_gseapy(n2v2r_ranking_pd.iloc[:,[0]], gobp_pathway_fn, background=gobp_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_borda_enr_KEGG_pd = enrichr_gseapy(n2v2r_borda_ranking_pd, kegg_pathway_fn, background=kegg_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_borda_enr_GOBP_pd = enrichr_gseapy(n2v2r_borda_ranking_pd, gobp_pathway_fn, background=gobp_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

absDeDi_enr_KEGG_pd = enrichr_gseapy(DeDi_ranking_pd.iloc[:,[1]], kegg_pathway_fn, background=kegg_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

absDeDi_enr_GOBP_pd = enrichr_gseapy(DeDi_ranking_pd.iloc[:,[1]], gobp_pathway_fn, background=gobp_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

if save_results:
    path = '../results/results_gsea/' + gsea_parent_folder_name
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    n2v2r_consensus_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_consensus_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r_borda_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+ 
                             "_n2v2r_borda_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')



In [28]:
from node2vec2rank.post_utils import plot_gseapy_enrich 

stability_cutoff = 0.2
padj_cutoff = 0.1

save_directory = '../results/results_gsea/' + gsea_parent_folder_name
# save_directory = None


title = network_inf_method+ " " +  tissue +  " KEGG ORA " + target + " padj " + str(padj_cutoff) + " " +background + " top " + str(top_k_percent) + " stab " + str(stability_cutoff)

plot_gseapy_enrich(n2v2r_consensus_enr_KEGG_pd, has_stability=True, padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff,  title="n2v2r cons. " + title,output_dir=save_directory)
plot_gseapy_enrich(n2v2r_enr_KEGG_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="n2v2r " + title,output_dir=save_directory)
plot_gseapy_enrich(n2v2r_borda_enr_KEGG_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="n2v2r borda " + title,output_dir=save_directory)
plot_gseapy_enrich(absDeDi_enr_KEGG_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="absDeDi " + title,output_dir=save_directory)

title = network_inf_method+ " " +  tissue +  " GOBP ORA " + target + " padj " + str(padj_cutoff) + " " +background + " top " + str(top_k_percent) + " stab " + str(stability_cutoff)

plot_gseapy_enrich(n2v2r_consensus_enr_GOBP_pd, has_stability=True, padj_cutoff=padj_cutoff,stability_cutoff=stability_cutoff,  title="n2v2r cons. " + title,output_dir=save_directory)
plot_gseapy_enrich(n2v2r_enr_GOBP_pd, padj_cutoff=padj_cutoff,has_stability=False, stability_cutoff=stability_cutoff, title="n2v2r " + title,output_dir=save_directory)
plot_gseapy_enrich(n2v2r_borda_enr_GOBP_pd, padj_cutoff=padj_cutoff,has_stability=False, stability_cutoff=stability_cutoff, title="n2v2r borda " + title,output_dir=save_directory)
plot_gseapy_enrich(absDeDi_enr_GOBP_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="absDeDi " + title,output_dir=save_directory)



In [29]:
# run prerank GSEA
from node2vec2rank.post_utils import prerank_gseapy

save_results = True

# read the geneset libraries
kegg_pathway_fn = '../data/gene_set_libraries/yeast/KEGG_2018_yeast.gmt'
gobp_pathway_fn = '../data/gene_set_libraries/yeast/GO_Biological_Process_2018_yeast.gmt'

prerank_weight = 0
prerank_min_path_size = 5
prerank_max_path_size = 1500
prerank_num_perms = 1000

n2v2r_consensus_pre_KEGG_pd = prerank_gseapy(n2v2r_ranking_pd, kegg_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_consensus_pre_GOBP_pd = prerank_gseapy(n2v2r_ranking_pd, gobp_pathway_fn,prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_pre_KEGG_pd = prerank_gseapy(n2v2r_ranking_pd.iloc[:,[0]], kegg_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_pre_GOBP_pd = prerank_gseapy(n2v2r_ranking_pd.iloc[:,[0]], gobp_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_pre_KEGG_pd = prerank_gseapy(n2v2r_borda_ranking_pd, kegg_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_pre_GOBP_pd = prerank_gseapy(n2v2r_borda_ranking_pd, gobp_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

absDeDi_pre_KEGG_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[1]], kegg_pathway_fn, prerank_weight=prerank_weight,
                                   prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

absDeDi_pre_GOBP_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[1]], gobp_pathway_fn, prerank_weight=prerank_weight,
                                   prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

DeDi_pre_KEGG_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[0]], kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

DeDi_pre_GOBP_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[0]], gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_DeDi_pre_KEGG_pd = prerank_gseapy(n2v2r_DeDi_ranking_pd.iloc[:,[0]], kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                            prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_DeDi_pre_GOBP_pd = prerank_gseapy(n2v2r_DeDi_ranking_pd.iloc[:,[0]], gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                            prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_DeDi_pre_KEGG_pd = prerank_gseapy(n2v2r_borda_DeDi_ranking_pd, kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                            prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_DeDi_pre_GOBP_pd = prerank_gseapy(n2v2r_borda_DeDi_ranking_pd, gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                            prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_consensus_DeDi_pre_KEGG_pd = prerank_gseapy(n2v2r_DeDi_ranking_pd, kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                      prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_consensus_DeDi_pre_GOBP_pd = prerank_gseapy(n2v2r_DeDi_ranking_pd, gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                      prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

if save_results:
    path = '../results/results_gsea/' + gsea_parent_folder_name
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    n2v2r_consensus_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r"+"_consensus_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_consensus_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r"+"_consensus_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_n2v2r_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_n2v2r_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')

    n2v2r_borda_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_n2v2r_borda_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_n2v2r_borda_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')


    absDeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_DeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_DeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                        "_n2v2r_DeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                        "_n2v2r_DeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')

    n2v2r_borda_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                        "_n2v2r_borda_DeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                        "_n2v2r_borda_DeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')

    n2v2r_consensus_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                  "_n2v2r_consensus_DeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_consensus_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                  "_n2v2r_consensus_DeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')

The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of tho

In [30]:
from node2vec2rank.post_utils import plot_gseapy_prerank 

stability_cutoff = 0.2
padj_cutoff = 0.1

save_directory = '../results/results_gsea/' + gsea_parent_folder_name
# save_directory = None


title = network_inf_method+ " " +  tissue +  " KEGG prerank " + target + " padj " + str(padj_cutoff) + " stab " + str(stability_cutoff)


plot_gseapy_prerank(n2v2r_consensus_pre_KEGG_pd, has_stability=True, padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r consensus " +title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_consensus_DeDi_pre_KEGG_pd,has_stability=True, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r+DeDi consensus " + title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_pre_KEGG_pd, has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r " + title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_DeDi_pre_KEGG_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r+DeDi " + title, output_dir=save_directory)

plot_gseapy_prerank(n2v2r_borda_pre_KEGG_pd, has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r borda " + title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_borda_DeDi_pre_KEGG_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r_borda+DeDi " + title, output_dir=save_directory)

plot_gseapy_prerank(absDeDi_pre_KEGG_pd,has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="absDeDi " + title, output_dir=save_directory)
plot_gseapy_prerank(DeDi_pre_KEGG_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="DeDi " + title, output_dir=save_directory)

title = network_inf_method+ " " +  tissue +  " GOBP prerank " + target + " padj " + str(padj_cutoff) + " stab " + str(stability_cutoff)

plot_gseapy_prerank(n2v2r_consensus_pre_GOBP_pd, has_stability=True, padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r consensus " +title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_consensus_DeDi_pre_GOBP_pd,has_stability=True,one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r+DeDi consensus " + title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_pre_GOBP_pd, has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r " + title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_DeDi_pre_GOBP_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r+DeDi " + title, output_dir=save_directory)

plot_gseapy_prerank(n2v2r_borda_pre_GOBP_pd, has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r borda " + title, output_dir=save_directory)
plot_gseapy_prerank(n2v2r_borda_DeDi_pre_GOBP_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r_borda+DeDi " + title, output_dir=save_directory)

plot_gseapy_prerank(absDeDi_pre_GOBP_pd,has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="absDeDi " + title, output_dir=save_directory)
plot_gseapy_prerank(DeDi_pre_GOBP_pd,has_stability=False,padj_cutoff=padj_cutoff, one_sided= False, stability_cutoff=stability_cutoff, title="DeDi " + title, output_dir=save_directory)
