In [1]:
import warnings
warnings.filterwarnings('ignore')

import multiprocessing
n_cores = multiprocessing.cpu_count()

import sys
import os
import copy 

sys.path.insert(1, os.path.realpath(os.path.pardir))

import pandas as pd
import numpy as np

In [2]:
## experiment info 

##if the network is bipartite but represented as a symmetric matrix
symmetric_bipartite = True

tissue = 'LUAD'
target = 'mVSf'
network_inf_method = 'PANDA'
sex_and_gender = True
gsea_parent_folder_name = 'PANDA_LUAD_MF_e2e_new_nocen'
save_gsea_results_notes = ''


In [3]:
from node2vec2rank.dataloader_new import DataLoader

import json

#read the config file
config = json.load(open('../configs/config_panda_e2e.json', 'r'))

config = {param: value for section, params in config.items()
          for param, value in params.items()}

dataloader = DataLoader(config)

There are 28020 row nodes and 28020 column nodes in graph 1 
There are 28020 row nodes and 28020 column nodes in graph 2 


In [4]:
## transforming the symmetric PANDA matrices into rectangular
## TFs have gene symbols, gene have ens IDs
if symmetric_bipartite:
    dataloader.graphs[0] = dataloader.graphs[0][['ENSG' not in s for s in dataloader.graphs[0].index]]
    dataloader.graphs[0] = dataloader.graphs[0].filter(like="ENSG", axis=1)

    dataloader.graphs[1] = dataloader.graphs[1][['ENSG' not in s for s in dataloader.graphs[1].index]]
    dataloader.graphs[1] = dataloader.graphs[1].filter(like="ENSG", axis=1)

    dataloader.interest_nodes = dataloader.graphs[0].columns.to_list()

In [5]:
# get degree difference

from node2vec2rank.model import degree_difference_ranking

DeDi_ranking = degree_difference_ranking(dataloader.graphs, dataloader.interest_nodes, threshold=0)

In [6]:
from node2vec2rank.model import n2v2r

model = n2v2r(graphs=dataloader.graphs, config=config, node_names=dataloader.interest_nodes)
rankings = model.fit_transform_rank()

borda_rankings = model.aggregate_transform()

signed_rankings = model.signed_ranks_transform([v.iloc[:,0] for k,v in DeDi_ranking.items()])


Running n2v2r with dimensions [2, 4, 16, 8] and distance metrics ['euclidean', 'cosine'] ...
	UASE embedding in 162.11 seconds for bin=False and keep_top=100%
		Ranking in 3.4 seconds
	UASE embedding in 245.18 seconds for bin=True and keep_top=100%
		Ranking in 5.93 seconds
	UASE embedding in 202.57 seconds for bin=False and keep_top=75%
		Ranking in 3.4 seconds
	UASE embedding in 178.51 seconds for bin=True and keep_top=75%
		Ranking in 3.35 seconds
n2v2r computed 32 rankings for 1 comparison(s) in 1326.56 seconds

Rank aggregation with Borda ...
	Finished aggregation in 11.28 seconds

Signed ranks transformation ...
	Finished signed transformation in 8.27 seconds


In [7]:
##get the rankings of the comparison and map the ens to symbols

comparison = '1vs2'
n2v2r_ranking_pd = rankings[comparison]
n2v2r_borda_ranking_pd = borda_rankings[comparison]
n2v2r_DeDi_ranking_pd = signed_rankings[comparison]
n2v2r_borda_DeDi_ranking_pd = model.pairwise_signed_aggregate_ranks[comparison]
DeDi_ranking_pd = DeDi_ranking[comparison]

gencode_fn = '../data/gene_set_libraries/human/gen_v26_mapping.csv'
gencode = pd.read_csv(gencode_fn, index_col=0)
ens2symbol = {i['gene_id'].split('.')[0]: i['gene_name']
              for k, i in gencode.iterrows()}
genes_mapped = [ens2symbol[x] if x in ens2symbol else x for x in dataloader.interest_nodes ]

n2v2r_ranking_pd.index = genes_mapped
n2v2r_borda_ranking_pd.index = genes_mapped
n2v2r_DeDi_ranking_pd.index = genes_mapped
n2v2r_borda_DeDi_ranking_pd.index = genes_mapped
DeDi_ranking_pd.index = genes_mapped


In [8]:
# from node2vec2rank.visualization_utils import dim_reduction, plot_embeddings

# algorithm = 'pca'
# n_components = 2

# first_embeddings = model.node_embeddings[0]
# second_embeddings = model.node_embeddings[1]
# concat_embeddings = np.append(first_embeddings, second_embeddings, axis=0)


# first_embeddings_red = dim_reduction(first_embeddings[:,:6], algorithm=algorithm, n_components=n_components)
# second_embeddings_red = dim_reduction(second_embeddings[:,:6], algorithm=algorithm, n_components=n_components)
# concat_embeddings_red = dim_reduction(concat_embeddings[:,:6], algorithm=algorithm, n_components=n_components)


# plot_embeddings(first_embeddings_red, color_type='numeric', color = n2v2r_borda_ranking_pd.loc[genes_mapped,'borda_ranks'], names=genes_mapped)
# plot_embeddings(second_embeddings_red, color_type='numeric', color = n2v2r_borda_ranking_pd.loc[genes_mapped,'borda_ranks'], names=genes_mapped)

# num_nodes = first_embeddings_red.shape[0]
# color_one = np.zeros(num_nodes)
# color_two = np.ones(num_nodes)
# color_concat = np.append(color_one, color_two, axis=0)

# plot_embeddings(concat_embeddings_red, color=color_concat, names = np.append(genes_mapped,genes_mapped))

In [9]:
# check nDCG score for some set of genes for n2v2r (borda) ranking

from node2vec2rank.post_utils import read_gmt, normalized_discounted_cumulative_gain
import random


kegg_pathway_fn = '../data/gene_set_libraries/human/c2.cp.kegg.v7.5.1.symbols.gmt'
chrX_escapees_fn = '../data/gene_set_libraries/human/chrX_escapees.csv'
gencode_fn = '../data/gene_set_libraries/human/gen_v26_mapping.csv'

gencode = pd.read_csv(gencode_fn, index_col=0)
chrY_genes = set(gencode.loc[gencode['seqid'] == 'chrY', 'gene_name'])
kegg_pathways = read_gmt(kegg_pathway_fn, False)
chrX_escapees = set(pd.read_csv(
    chrX_escapees_fn, sep=",", header=None).iloc[0, :])
sex_biased_genes = chrY_genes.union(chrX_escapees)

sorted_n2v2r_borda_ranking_pd = n2v2r_borda_ranking_pd.sort_values(by = 'borda_ranks', ascending=False)
ranked_nodes_n2v2r = sorted_n2v2r_borda_ranking_pd.index.to_list()

sorted_DeDi_ranking_pd = DeDi_ranking_pd.sort_values(by = 'absDeDi', ascending=False)
ranked_nodes_absDeDi = sorted_DeDi_ranking_pd.index.to_list().copy()

## chrY
# which genes should be relevant
relevance_vector_n2v2r_chrY = [
    1 if x in chrY_genes else 0 for x in ranked_nodes_n2v2r]
nDCG_n2v2r_chrY = normalized_discounted_cumulative_gain(relevance_vector_n2v2r_chrY, binary_relevance=True)
print(f"nDCG score for n2v2r and chrY is {round(nDCG_n2v2r_chrY,3)}")

relevance_vector_absDeDi_chrY = [
    1 if x in chrY_genes else 0 for x in ranked_nodes_absDeDi]
nDCG_absDeDi_chrY = normalized_discounted_cumulative_gain(relevance_vector_absDeDi_chrY, binary_relevance=True)

print(f"nDCG score for (absolute) DeDi and chrY is {round(nDCG_absDeDi_chrY,3)}")

## escapees X
# which genes should be relevant
relevance_vector_n2v2r_chrX = [
    1 if x in chrX_escapees else 0 for x in ranked_nodes_n2v2r]
nDCG_n2v2r_chrX = normalized_discounted_cumulative_gain(relevance_vector_n2v2r_chrX, binary_relevance=True)
print(f"\nnDCG score for n2v2r and chrX escapees is {round(nDCG_n2v2r_chrX,3)}")

relevance_vector_absDeDi_chrX = [
    1 if x in chrX_escapees else 0 for x in ranked_nodes_absDeDi]
nDCG_absDeDi_chrX = normalized_discounted_cumulative_gain(relevance_vector_absDeDi_chrX, binary_relevance=True)

print(f"nDCG score for (absolute) DeDi and chrX escapees is {round(nDCG_absDeDi_chrX,3)}")

# which genes should be relevant
## both escapees X and chr Y
relevance_vector_n2v2r_sex_biased = [
    1 if x in sex_biased_genes else 0 for x in ranked_nodes_n2v2r]
nDCG_n2v2r_sex_biased = normalized_discounted_cumulative_gain(relevance_vector_n2v2r_sex_biased, binary_relevance=True)
print(f"\nnDCG score for n2v2r and chrY + chrX escapees is {round(nDCG_n2v2r_sex_biased,3)}")

relevance_vector_absDeDi_sex_biased = [
    1 if x in sex_biased_genes else 0 for x in ranked_nodes_absDeDi]
nDCG_absDeDi_sex_biased = normalized_discounted_cumulative_gain(relevance_vector_absDeDi_sex_biased, binary_relevance=True)

print(f"nDCG score for (absolute) DeDi and chrY + chrX escapees is {round(nDCG_absDeDi_sex_biased,3)}")


nDCG score for n2v2r and chrY is 0.364
nDCG score for (absolute) DeDi and chrY is 0.721

nDCG score for n2v2r and chrX escapees is 0.529
nDCG score for (absolute) DeDi and chrX escapees is 0.542

nDCG score for n2v2r and chrY + chrX escapees is 0.549
nDCG score for (absolute) DeDi and chrY + chrX escapees is 0.647


In [10]:
# remove chrY genes for the analysis

if sex_and_gender:
    chrY_gene_names = set(gencode.loc[gencode['seqid'] == 'chrY', 'gene_name'])

    DeDi_data_pd = DeDi_ranking_pd[~DeDi_ranking_pd.index.isin(chrY_gene_names)]

    n2v2r_ranking_pd = n2v2r_ranking_pd[~n2v2r_ranking_pd.index.isin(
        chrY_gene_names)]
    n2v2r_borda_ranking_pd = n2v2r_borda_ranking_pd[~n2v2r_borda_ranking_pd.index.isin(
        chrY_gene_names)]

    n2v2r_DeDi_ranking_pd = n2v2r_DeDi_ranking_pd[~n2v2r_DeDi_ranking_pd.index.isin(
        chrY_gene_names)]
    n2v2r_borda_DeDi_ranking_pd = n2v2r_borda_DeDi_ranking_pd[~n2v2r_borda_DeDi_ranking_pd.index.isin(
        chrY_gene_names)]

In [15]:
# run enrich GSEA
from node2vec2rank.post_utils import enrichr_gseapy, read_gmt
from itertools import chain
import os

save_results = True

# read the geneset libraries
kegg_pathway_fn = '../data/gene_set_libraries/human/c2.cp.kegg.v7.5.1.symbols.gmt'
gobp_pathway_fn = '../data/gene_set_libraries/human/c5.go.bp.v7.5.1.symbols.gmt'

# network_background or pathway_background for enrichment
# network will use the genes in the network only, while pathway will use all the genes in the pathways
# network is "more fair" but will find less things in small networks
background = 'network_background'
organism = 'human'

# take the top k percentage of the ranking for enrichment
top_k_percent = 5

if background == 'network_background':
    kegg_background = n2v2r_ranking_pd.index.to_list()
    gobp_background = n2v2r_ranking_pd.index.to_list()
elif background == 'pathway_background':
    kegg_dict = read_gmt(kegg_pathway_fn)
    kegg_background = list(set(chain.from_iterable(kegg_dict.values())))
    gobp_dict = read_gmt(gobp_pathway_fn)
    gobp_background = list(set(chain.from_iterable(gobp_dict.values())))
else:
    raise Exception("Enrichment background not properly set")

n2v2r_enr_KEGG_pd = enrichr_gseapy(n2v2r_ranking_pd, kegg_pathway_fn, background=kegg_background,enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_enr_GOBP_pd = enrichr_gseapy(n2v2r_ranking_pd, gobp_pathway_fn, background=gobp_background,enrich_quantile_cutoff=top_k_percent/100, organism=organism)

borda_enr_KEGG_pd = enrichr_gseapy(n2v2r_borda_ranking_pd, kegg_pathway_fn, background=kegg_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

borda_enr_GOBP_pd = enrichr_gseapy(n2v2r_borda_ranking_pd, gobp_pathway_fn, background=gobp_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

absDeDi_enr_KEGG_pd = enrichr_gseapy(DeDi_ranking_pd.iloc[:,[1]], kegg_pathway_fn, background=kegg_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

absDeDi_enr_GOBP_pd = enrichr_gseapy(DeDi_ranking_pd.iloc[:,[1]], gobp_pathway_fn, background=gobp_background, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

if save_results:
    path = '../results/results_gsea/' + gsea_parent_folder_name
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    n2v2r_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')


In [17]:
from node2vec2rank.post_utils import plot_gseapy_enrich 

stability_cutoff = 0.5
padj_cutoff = 0.1

save_directory = '../results/results_gsea/' + gsea_parent_folder_name
# save_directory = None


title = network_inf_method+ " " +  tissue +  " KEGG ORA " + target + " padj " + str(padj_cutoff) + " " +background + " top " + str(top_k_percent) + " stab " + str(stability_cutoff)

plot_gseapy_enrich(n2v2r_enr_KEGG_pd, has_stability=True, padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff,  title="n2v2r " + title,output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_enrich(borda_enr_KEGG_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="Borda n2v2r " + title,output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_enrich(absDeDi_enr_KEGG_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="absDeDi " + title,output_dir=save_directory, trim_first_num_characters = 5)

title = network_inf_method+ " " +  tissue +  " GOBP ORA " + target + " padj " + str(padj_cutoff) + " " +background + " top " + str(top_k_percent) + " stab " + str(stability_cutoff)

plot_gseapy_enrich(n2v2r_enr_GOBP_pd, has_stability=True, padj_cutoff=padj_cutoff,stability_cutoff=stability_cutoff,  title="n2v2r " + title,output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_enrich(borda_enr_GOBP_pd, padj_cutoff=padj_cutoff,has_stability=False, stability_cutoff=stability_cutoff, title="Borda n2v2r " + title,output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_enrich(absDeDi_enr_GOBP_pd, padj_cutoff=padj_cutoff, has_stability=False, stability_cutoff=stability_cutoff, title="absDeDi " + title,output_dir=save_directory, trim_first_num_characters = 5)



No results found for Borda n2v2r PANDA LUAD KEGG ORA mVSf padj 0.1 network_background top 5 stab 0.5
No results found for absDeDi PANDA LUAD KEGG ORA mVSf padj 0.1 network_background top 5 stab 0.5


In [13]:
# run prerank GSEA
from node2vec2rank.post_utils import prerank_gseapy

save_results = True

# read the geneset libraries
kegg_pathway_fn = '../data/gene_set_libraries/human/c2.cp.kegg.v7.5.1.symbols.gmt'
gobp_pathway_fn = '../data/gene_set_libraries/human/c5.go.bp.v7.5.1.symbols.gmt'

prerank_weight = 0
prerank_min_path_size = 5
prerank_max_path_size = 1500
prerank_num_perms = 1000

n2v2r_pre_KEGG_pd = prerank_gseapy(n2v2r_ranking_pd, kegg_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_pre_GOBP_pd = prerank_gseapy(n2v2r_ranking_pd, gobp_pathway_fn,prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

borda_pre_KEGG_pd = prerank_gseapy(n2v2r_borda_ranking_pd, kegg_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

borda_pre_GOBP_pd = prerank_gseapy(n2v2r_borda_ranking_pd, gobp_pathway_fn, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

absDeDi_pre_KEGG_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[1]], kegg_pathway_fn, prerank_weight=prerank_weight,
                                   prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

absDeDi_pre_GOBP_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[1]], gobp_pathway_fn, prerank_weight=prerank_weight,
                                   prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

DeDi_pre_KEGG_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[0]], kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

DeDi_pre_GOBP_pd = prerank_gseapy(DeDi_ranking_pd.iloc[:,[0]], gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_DeDi_pre_KEGG_pd = prerank_gseapy(n2v2r_borda_DeDi_ranking_pd, kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                            prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_DeDi_pre_GOBP_pd = prerank_gseapy(n2v2r_borda_DeDi_ranking_pd, gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                            prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_DeDi_pre_KEGG_pd = prerank_gseapy(n2v2r_DeDi_ranking_pd, kegg_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                      prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_DeDi_pre_GOBP_pd = prerank_gseapy(n2v2r_DeDi_ranking_pd, gobp_pathway_fn, one_sided=False, prerank_weight=prerank_weight,
                                      prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

if save_results:
    path = '../results/results_gsea/' + gsea_parent_folder_name
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    n2v2r_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r"+"_consensus_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                             "_n2v2r"+"_consensus_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_DeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_DeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                        "_n2v2r_borda_DeDi"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                        "_n2v2r_borda_DeDi"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                  "_n2v2r_chimera"+"_prerank_KEGG_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                  "_n2v2r_chimera"+"_prerank_GOBP_"+save_gsea_results_notes+".tsv", header=True, index=None, sep='\t')

The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of tho

In [14]:
from node2vec2rank.post_utils import plot_gseapy_prerank 

stability_cutoff = 0.5
padj_cutoff = 0.1

save_directory = '../results/results_gsea/' + gsea_parent_folder_name
# save_directory = None


title = network_inf_method+ " " +  tissue +  " KEGG prerank " + target + " padj " + str(padj_cutoff) + " stab " + str(stability_cutoff)


plot_gseapy_prerank(n2v2r_pre_KEGG_pd, has_stability=True, padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r " +title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(n2v2r_DeDi_pre_KEGG_pd,has_stability=True, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="signed n2v2r " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(borda_pre_KEGG_pd, has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="Borda n2v2r " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(n2v2r_borda_DeDi_pre_KEGG_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="signed Borda n2v2r " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(absDeDi_pre_KEGG_pd,has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="absDeDi " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(DeDi_pre_KEGG_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="DeDi " + title, output_dir=save_directory, trim_first_num_characters = 5)

title = network_inf_method+ " " +  tissue +  " GOBP prerank " + target + " padj " + str(padj_cutoff) + " stab " + str(stability_cutoff)

plot_gseapy_prerank(n2v2r_pre_GOBP_pd, has_stability=True, padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="n2v2r " +title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(n2v2r_DeDi_pre_GOBP_pd,has_stability=True,one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="signed n2v2r " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(borda_pre_GOBP_pd, has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="Borda n2v2r " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(n2v2r_borda_DeDi_pre_GOBP_pd,has_stability=False, one_sided= False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="signed Borda n2v2r " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(absDeDi_pre_GOBP_pd,has_stability=False,padj_cutoff=padj_cutoff, stability_cutoff=stability_cutoff, title="absDeDi " + title, output_dir=save_directory, trim_first_num_characters = 5)
plot_gseapy_prerank(DeDi_pre_GOBP_pd,has_stability=False,padj_cutoff=padj_cutoff, one_sided= False, stability_cutoff=stability_cutoff, title="DeDi " + title, output_dir=save_directory, trim_first_num_characters = 5)


No results found for n2v2r PANDA LUAD KEGG prerank mVSf padj 0.1 stab 0.5


No results found for Borda n2v2r PANDA LUAD KEGG prerank mVSf padj 0.1 stab 0.5


No results found for absDeDi PANDA LUAD KEGG prerank mVSf padj 0.1 stab 0.5


No results found for Borda n2v2r PANDA LUAD GOBP prerank mVSf padj 0.1 stab 0.5


No results found for absDeDi PANDA LUAD GOBP prerank mVSf padj 0.1 stab 0.5
