In [1]:
import numpy as np
import pandas as pd
from utils import *
import warnings
warnings.filterwarnings('ignore')

def get_ppi():
    # load gene interaction graph (GeneMania)
    ppi = pd.read_csv('input_data/COMBINED.DEFAULT_NETWORKS.BP_COMBINING.txt',delimiter='\t')
    # turn ensembl_gene_id to hgnc_symbol
    mapping = pd.read_csv('input_data/ENST-ENSG-HGNC-GO-kegg-map.csv')
    ensg_2_hgnc = dict(zip(mapping['ensembl_gene_id'],mapping['hgnc_symbol']))
    # mapping
    ppi['Gene_A']  = ppi['Gene_A'].map(lambda x: ensg_2_hgnc[x] if x in ensg_2_hgnc else None)
    ppi['Gene_B']  = ppi['Gene_B'].map(lambda x: ensg_2_hgnc[x] if x in ensg_2_hgnc else None)
    ppi = ppi[ppi.Gene_A.notnull()]
    ppi = ppi[ppi.Gene_B.notnull()]
    ppi = ppi[ppi.Weight.notnull()]
    return ppi
def to_A(row):
    x = row.values.reshape((-1,1))
    m = np.multiply(x, x.T)
    l,r = np.where(m)
    edges = [(l[i],r[i]) for i in range(len(l)) if l[i] != r[i]]
    A = list(set(edges).intersection(couple_set)) 
    return A

# create PPI graph
top_genes = [e.strip() for e in open('output_data/top_genes.txt') if e.strip()]
ppi = get_ppi()
ppi_gene = set( list(ppi['Gene_A']) + list(ppi['Gene_B']))
common_gene = list(set(top_genes).intersection(ppi_gene)) 
ppi = ppi[ (ppi['Gene_A'].isin(common_gene)) & (ppi['Gene_B'].isin(common_gene)) ]
ppi = ppi[ppi.Gene_A.notnull()]
ppi = ppi[ppi.Gene_B.notnull()]
# create a dictionary of genes and corresponding index value
common_gene_idx = {common_gene[i]:i for i in range(len(common_gene))}
ppi['Gene_A_idx'] = ppi['Gene_A'].apply(lambda x: common_gene_idx[x])
ppi['Gene_B_idx'] = ppi['Gene_B'].apply(lambda x: common_gene_idx[x])
# create couples of vertices and weights
ppi['couple'] = ppi.apply(lambda row: (row.Gene_A_idx, row.Gene_B_idx), axis=1)
couple_w = dict(zip(ppi['couple'], ppi['Weight']))
couple_set = set(couple_w.keys())
UP, DOWN = .725, .275 #adjust so that the number of edges is similar to those in PAN & LIONESS graphs
df = pd.read_csv('output_data/gene_condition.csv')
X = df[common_gene].rank(pct=True, numeric_only=True)
Xup = (X >= UP)
Xup['A'] = Xup.apply(to_A, axis=1)
Xdown = (X <= DOWN)
Xdown['A'] = Xdown.apply(to_A, axis=1)
A = pd.DataFrame(Xup['A'] + Xdown['A'])
names = []
for index, row in A.iterrows():
    couples = row['A']
    names += [c[0] for c in couples]
    names += [c[1] for c in couples]
names = list((set(names)))
print('len(names):',len(names))
name_idx = {names[i]:i for i in range(len(names))}
NUM_NODE = len(name_idx)
G = []
for index, row in A.iterrows():
    m = np.zeros((NUM_NODE,NUM_NODE))
    couples = row['A']
    for c in couples:
        l,r = name_idx[c[0]],name_idx[c[1]]
        m[l][r] = 1
        m[r][l] = 1
    G += [m]         
ls = [sum(sum(e)) for e in G]
print('number of edges:', int(np.median(ls)/2))    
print(int(min(ls)),int(max(ls)),int(np.mean(ls)/2),int(np.median(ls)/2))
G = np.array(G)

# get features for this adjacency graph
feats = get_closeness_centrality(G)
# store the feature in 'pan_graph_feature.csv'
with open('output_data/ppi_graph_feature.csv', 'w') as f:
    for e in feats:
        f.write(','.join(map(str,e)) + '\n')  
        
print('Done creating PPI graph!')         

len(names): 96
number of edges: 204
100 1172 216 204
Done creating PPI graph!
