In [1]:
import warnings
warnings.filterwarnings('ignore')

import multiprocessing
n_cores = multiprocessing.cpu_count()

import sys
import os
import copy 

sys.path.insert(1, os.path.realpath(os.path.pardir))

import pandas as pd
import numpy as np

In [2]:
##load the data

grn_filenames = ("../data/networks/locscn/avg_csn_ctl.csv",
                 "../data/networks/locscn/avg_csn_asd.csv") 

tissue = 'brain'
target = 'ctrVSasd'
network_inf_method = 'LOCSCN'
gsea_parent_folder_name = 'LOCSCN_ASD_UASE_n2v2r'
save_results_notes = 'bin_75'



#do the data have index and header??
## if it is .csv, most likely the sep = ","
#  if it is .tsv, most likely the sep = "\t"
index_col = 0
header = 0
sep = ','

#load the gene regulatory networks into PANDAS dataframes and then to a list
grns = []
for grn_filename in grn_filenames:
    grn_pd = pd.read_csv(grn_filename,  index_col=index_col, header=header, sep=sep)
    grns.append(grn_pd)

In [3]:
## network transformation and preparation for n2v2r
## be mindful of the transformations and their consequences e.g., n2v2r does not accept negative weights

from node2vec2rank.utils import network_transform

row_genes = grns[0].index.to_numpy()
col_genes = grns[0].columns.to_numpy()

total_genes = np.append(row_genes, col_genes)
total_genes = np.unique(total_genes)

num_rows = np.size(row_genes)
num_cols = np.size(col_genes)
num_total = np.size(total_genes)

print(f"There are {num_rows} row genes, {num_cols} column genes, and {num_total} unique genes in control graph")

## parameteres for network transformation

# make the network binary
binarize_network = True

# everything below the threshold will become zero
threshold = 0

# only keeps the top % of edge weights
top_percent_keep = 75

#transform the networks 
grns_transformed = []
for grn in grns:
    grns_transformed.append(network_transform(grn.to_numpy(), 
    binarize=binarize_network,
    threshold=threshold, 
    top_percent_keep=top_percent_keep))

There are 942 row genes, 942 column genes, and 942 unique genes in control graph


In [4]:
## get DeDi

control_net_adj = grns[0].copy()
case_net_adj =  grns[1].copy()

net_one_adj_indegree = control_net_adj.sum(axis=0).to_numpy()
net_two_adj_indegree = case_net_adj.sum(axis=0).to_numpy()
DeDi_genes = control_net_adj.columns.to_list()

DeDi = net_one_adj_indegree - net_two_adj_indegree
absDeDi = np.abs(DeDi)

DeDi_data_dict = {"genes" :DeDi_genes, "DeDi": DeDi, "absDeDi": absDeDi}
DeDi_data_pd = pd.DataFrame(DeDi_data_dict, index=DeDi_genes)
DeDi_data_pd.sort_values(by='absDeDi', ascending=False, inplace=True)

In [11]:
from node2vec2rank.model import n2v2r

import json

from scipy.sparse import csc_matrix


#read the config file
config = json.load(open('../configs/config.json', 'r'))

config = {param: value for section, params in config.items()
          for param, value in params.items()}

##the dictionary for mapping indices to gene names
node_names = total_genes

graphs=[]

graphs.append(csc_matrix(grns_transformed[0]))
graphs.append(csc_matrix(grns_transformed[1]))

model = n2v2r(graphs=graphs, config=config, node_names=node_names)
rankings = model.fit_transform_rank()

borda_rankings = model.aggregate_transform()

signed_rankings = model.signed_transform(DeDi_data_pd.iloc[:,1])


Running n2v2r with dimensions [2, 4, 6, 8, 10, 12, 14, 16] and distance metrics ['euclidean', 'cosine', 'cityblock', 'correlation', 'chebyshev'] ...
    n2v2r took 0.95 seconds

Rank aggregation with Borda ...
    Finished aggregation in 0.45 seconds

Signed transformation ...
    Finished signed transformation in 0.35 seconds


In [12]:
n2v2r_ranking_pd = rankings[0]
n2v2r_borda_ranking_pd = borda_rankings[0]
n2v2r_DeDi_ranking_pd = signed_rankings[0]
n2v2r_borda_DeDi_ranking_pd = model.aggregate_signed_ranks_sequence[0]

In [13]:
from node2vec2rank.visualization_utils import dim_reduction, plot_embeddings

algorithm = 'pca'
n_components = 2

first_embeddings = model.node_embeddings[0]
second_embeddings = model.node_embeddings[1]
concat_embeddings = np.append(first_embeddings, second_embeddings, axis=0)


first_embeddings_red = dim_reduction(first_embeddings[:,:6], algorithm=algorithm, n_components=n_components)
second_embeddings_red = dim_reduction(second_embeddings[:,:6], algorithm=algorithm, n_components=n_components)
concat_embeddings_red = dim_reduction(concat_embeddings[:,:6], algorithm=algorithm, n_components=n_components)


plot_embeddings(first_embeddings_red, color_type='numeric', color = n2v2r_borda_ranking_pd.loc[total_genes,'borda_ranks'], names=node_names)
plot_embeddings(second_embeddings_red, color_type='numeric', color = n2v2r_borda_ranking_pd.loc[total_genes,'borda_ranks'], names=node_names)

num_nodes = first_embeddings_red.shape[0]
color_one = np.zeros(num_nodes)
color_two = np.ones(num_nodes)
color_concat = np.append(color_one, color_two, axis=0)

plot_embeddings(concat_embeddings_red, color=color_concat, names = np.append(node_names,node_names))

In [14]:
# run enrich GSEA
from node2vec2rank.post_utils import enrich_gsea, read_gmt
from itertools import chain
import os

save_results = True

# read the geneset libraries
kegg_pathway_fn = '../data/gene_set_libraries/human/c2.cp.kegg.v7.5.1.symbols.gmt'
gobp_pathway_fn = '../data/gene_set_libraries/human/c5.go.bp.v7.5.1.symbols.gmt'

# network_background or pathway_background for enrichment
# network will use the genes in the network only, while pathway will use all the genes in the pathways
# network is "more fair" but will find less things in small networks
background = 'pathway_background'
organism = 'human'

enrich_padj_cutoff = 0.1
# take the top k percentage of the ranking for enrichment
top_k_percent = 5

if background == 'network_background':
    kegg_background = n2v2r_ranking_pd.index.to_list()
    gobp_background = n2v2r_ranking_pd.index.to_list()
elif background == 'pathway_background':
    kegg_dict = read_gmt(kegg_pathway_fn)
    kegg_background = list(set(chain.from_iterable(kegg_dict.values())))
    gobp_dict = read_gmt(gobp_pathway_fn)
    gobp_background = list(set(chain.from_iterable(gobp_dict.values())))
else:
    raise Exception("Enrichment background not properly set")

n2v2r_enr_KEGG_pd = enrich_gsea(n2v2r_ranking_pd, kegg_pathway_fn, background=kegg_background,
                                          enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

n2v2r_enr_GOBP_pd = enrich_gsea(n2v2r_ranking_pd, gobp_pathway_fn, background=gobp_background,
                                          enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=top_k_percent/100, organism=organism)

borda_enr_KEGG_pd = enrich_gsea(n2v2r_borda_ranking_pd, kegg_pathway_fn, background=kegg_background,
                                enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

borda_enr_GOBP_pd = enrich_gsea(n2v2r_borda_ranking_pd, gobp_pathway_fn, background=gobp_background,
                                enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

absDeDi_enr_KEGG_pd = enrich_gsea(DeDi_data_pd[['absDeDi']], kegg_pathway_fn, background=kegg_background,
                                  enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

absDeDi_enr_GOBP_pd = enrich_gsea(DeDi_data_pd[['absDeDi']], gobp_pathway_fn, background=gobp_background,
                                  enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=1-top_k_percent/100, organism=organism)

if save_results:
    path = '../results/results_gsea/' + gsea_parent_folder_name
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    n2v2r_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_enr_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_enr_KEGG_"+background+"_top"+str(top_k_percent)+"_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_enr_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_enr_GOBP_"+background+"_top"+str(top_k_percent)+"_"+save_results_notes+".tsv", header=True, index=None, sep='\t')


combo: dim-2_distance-cosine with 11 found
combo: dim-2_distance-correlation with 11 found
combo: dim-4_distance-cosine with 11 found
combo: dim-4_distance-correlation with 11 found
combo: dim-6_distance-euclidean with 1 found
combo: dim-6_distance-cosine with 6 found
combo: dim-6_distance-correlation with 6 found
combo: dim-8_distance-euclidean with 2 found
combo: dim-8_distance-cosine with 15 found
combo: dim-8_distance-cityblock with 2 found
combo: dim-8_distance-correlation with 15 found
combo: dim-10_distance-euclidean with 1 found
combo: dim-10_distance-cosine with 1 found
combo: dim-10_distance-cityblock with 1 found
combo: dim-10_distance-correlation with 1 found
combo: dim-12_distance-euclidean with 2 found
combo: dim-12_distance-cosine with 1 found
combo: dim-12_distance-cityblock with 2 found
combo: dim-12_distance-correlation with 1 found
combo: dim-12_distance-chebyshev with 1 found
combo: dim-14_distance-euclidean with 2 found
combo: dim-14_distance-cosine with 19 found
c

In [15]:
## run prerank GSEA
from node2vec2rank.post_utils import prerank_gsea

save_results = True

# read the geneset libraries
kegg_pathway_fn = '../data/gene_set_libraries/human/c2.cp.kegg.v7.5.1.symbols.gmt'
gobp_pathway_fn = '../data/gene_set_libraries/human/c5.go.bp.v7.5.1.symbols.gmt'

prerank_padj_cutoff = 0.25
prerank_weight = 0
prerank_min_path_size = 5
prerank_max_path_size = 1500
prerank_num_perms = 1000

n2v2r_pre_KEGG_pd = prerank_gsea(n2v2r_ranking_pd, kegg_pathway_fn, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                           prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_pre_GOBP_pd = prerank_gsea(n2v2r_ranking_pd, gobp_pathway_fn, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                           prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

borda_pre_KEGG_pd = prerank_gsea(n2v2r_borda_ranking_pd, kegg_pathway_fn, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

borda_pre_GOBP_pd = prerank_gsea(n2v2r_borda_ranking_pd, gobp_pathway_fn, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                 prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

absDeDi_pre_KEGG_pd = prerank_gsea(DeDi_data_pd[['absDeDi']], kegg_pathway_fn, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                   prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

absDeDi_pre_GOBP_pd = prerank_gsea(DeDi_data_pd[['absDeDi']], gobp_pathway_fn, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                   prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

DeDi_pre_KEGG_pd = prerank_gsea(DeDi_data_pd[['DeDi']], kegg_pathway_fn, one_sided=False, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

DeDi_pre_GOBP_pd = prerank_gsea(DeDi_data_pd[['DeDi']], gobp_pathway_fn, one_sided=False, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_DeDi_pre_KEGG_pd = prerank_gsea(n2v2r_borda_DeDi_ranking_pd, kegg_pathway_fn, one_sided=False, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_borda_DeDi_pre_GOBP_pd = prerank_gsea(n2v2r_borda_DeDi_ranking_pd, gobp_pathway_fn, one_sided=False, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_DeDi_pre_KEGG_pd = prerank_gsea(n2v2r_DeDi_ranking_pd, kegg_pathway_fn, one_sided=False, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

n2v2r_DeDi_pre_GOBP_pd = prerank_gsea(n2v2r_DeDi_ranking_pd, gobp_pathway_fn, one_sided=False, prerank_padj_cutoff=prerank_padj_cutoff, prerank_weight=prerank_weight,
                                prerank_min_path_size=prerank_min_path_size, prerank_max_path_size=prerank_max_path_size, prerank_num_perms=prerank_num_perms, num_threads=n_cores)

if save_results:
    path = '../results/results_gsea/' + gsea_parent_folder_name
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    n2v2r_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_prerank_KEGG_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                                       "_n2v2r"+"_consensus_prerank_GOBP_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_prerank_KEGG_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    borda_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target+"_n2v2r" +
                             "_borda_prerank_GOBP_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_prerank_KEGG_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    absDeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                               "_absDeDi"+"_prerank_GOBP_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_DeDi"+"_prerank_KEGG_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_DeDi"+"_prerank_GOBP_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_n2v2r_borda_DeDi"+"_prerank_KEGG_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_borda_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_n2v2r_borda_DeDi"+"_prerank_GOBP_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_DeDi_pre_KEGG_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_n2v2r_chimera"+"_prerank_KEGG_"+save_results_notes+".tsv", header=True, index=None, sep='\t')
    n2v2r_DeDi_pre_GOBP_pd.to_csv(path+"/"+tissue+"_"+network_inf_method+"_"+target +
                            "_n2v2r_chimera"+"_prerank_GOBP_"+save_results_notes+".tsv", header=True, index=None, sep='\t')


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-cityblock with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of tho

combo: dim-10_distance-chebyshev with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-chebyshev with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-chebyshev with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-16_distance-chebyshev with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-cosine with 13 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-cityblock with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-correlation with 13 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of tho

combo: DeDi with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: DeDi with 29 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: signed_agg_ranks with 3 found
combo: dim-2_distance-euclidean with 1 found
combo: dim-2_distance-cosine with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-cityblock with 1 found
combo: dim-2_distance-correlation with 1 found
combo: dim-2_distance-chebyshev with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-6_distance-cosine with 1 found
combo: dim-6_distance-correlation with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-8_distance-cosine with 2 found
combo: dim-8_distance-correlation with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-10_distance-euclidean with 1 found
combo: dim-10_distance-cosine with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-10_distance-cityblock with 2 found
combo: dim-10_distance-correlation with 1 found
combo: dim-10_distance-chebyshev with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-euclidean with 2 found
combo: dim-12_distance-cosine with 1 found
combo: dim-12_distance-cityblock with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-correlation with 1 found
combo: dim-12_distance-chebyshev with 3 found
combo: dim-14_distance-euclidean with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-cityblock with 1 found
combo: dim-14_distance-chebyshev with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-16_distance-cosine with 1 found
combo: dim-16_distance-correlation with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-euclidean with 5 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-cosine with 6 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-cityblock with 7 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-correlation with 6 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-2_distance-chebyshev with 4 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-euclidean with 3 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-cosine with 12 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-cityblock with 3 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-correlation with 12 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-4_distance-chebyshev with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-6_distance-euclidean with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-6_distance-cosine with 6 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-6_distance-cityblock with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-6_distance-correlation with 6 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-6_distance-chebyshev with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-8_distance-euclidean with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-8_distance-cosine with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-8_distance-correlation with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.
The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-10_distance-cosine with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-10_distance-cityblock with 4 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-10_distance-correlation with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-10_distance-chebyshev with 4 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-euclidean with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-cosine with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-cityblock with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-correlation with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-12_distance-chebyshev with 3 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-euclidean with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-cosine with 4 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-cityblock with 2 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-correlation with 4 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-14_distance-chebyshev with 3 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-16_distance-euclidean with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-16_distance-cosine with 5 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-16_distance-cityblock with 1 found


The order of those genes will be arbitrary, which may produce unexpected results.


combo: dim-16_distance-correlation with 5 found
combo: dim-16_distance-chebyshev with 3 found


In [None]:
from scipy.stats import kendalltau
from sklearn.preprocessing import LabelEncoder



pvals = []
pairs_high_cor = []

for i in range(n2v2r_ranking_pd.shape[1]):
    le = LabelEncoder()

    one_col = n2v2r_ranking_pd.iloc[:,i].copy()
    one_col.sort_values(ascending=False, inplace=True)
    one_col_index = one_col.index.to_list()[:100]
    one_col_le = le.fit_transform(one_col_index)
    one_col_val = one_col.values
    
    for j in range (i+1,n2v2r_ranking_pd.shape[1]):
        le = LabelEncoder()

        two_col = n2v2r_ranking_pd.iloc[:,j].copy()
        two_col.sort_values(ascending=False, inplace=True)
        two_col_index = two_col.index.to_list()[:100]
        two_col_le = le.fit_transform(two_col_index)
        two_col_val = two_col.values


        stat, pval = kendalltau(one_col_index,two_col_index)
        pvals.append(stat)
        if pval > 0.98:
            pairs_high_cor.append((i,j))

In [None]:
import matplotlib.pyplot as plt
plt.hist(pvals,bins=100);

In [None]:
# kegg_one = enrich_gsea(pd.DataFrame(n2v2r_ranking_pd.iloc[:,37],index=n2v2r_ranking_pd.index), kegg_pathway_fn, background=kegg_background, enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=enrich_quantile_cutoff, organism=organism)
# kegg_two = enrich_gsea(pd.DataFrame(n2v2r_ranking_pd.iloc[:,82],index=n2v2r_ranking_pd.index), kegg_pathway_fn, background=kegg_background, enrich_padj_cutoff=enrich_padj_cutoff, enrich_quantile_cutoff=enrich_quantile_cutoff, organism=organism)


In [None]:
# one_col = n2v2r_ranking_pd.iloc[:,37].copy()
# one_col.sort_values(ascending=False, inplace=True)
# one_col_index = one_col.index.to_list()
# one_col_le = le.fit_transform(one_col_index)
# one_col_le


# two_col = n2v2r_ranking_pd.iloc[:,82].copy()
# two_col.sort_values(ascending=False, inplace=True)
# two_col_index = two_col.index.to_list()
# two_col_le = le.fit_transform(two_col_index)

# kendalltau(one_col_le, two_col_le)