# Communicated proteins

In [1]:
import os
from json import dumps
import logging
import pandas as pd
import numpy as np
import copy

import matplotlib.pyplot as plt
from matplotlib import cm

from CoRe.cypher_commands import command_set

import networkx as nx
import json

In [2]:
selected_TopLevelPathway = 'ImmunoMetabolism'
pathway_nametag = selected_TopLevelPathway.replace(' ','_')

network_type = 'medium-PPI'

state_type = 'maxEnt'

data_directory = "/Users/swarnavo/Research/Reactome-Graph-Database/HumanData/"+pathway_nametag
os.chdir(data_directory)

"""Read SARS source nodes
"""

f = open('SARS_CoV2-'+pathway_nametag+'_interactions.json')
SARS_nodes = json.load(f)
f.close()

print(SARS_nodes.keys())



dict_keys(['SARS-CoV2 Nsp2', 'SARS-CoV2 Nsp5', 'SARS-CoV2 Nsp7', 'SARS-CoV2 Nsp8', 'SARS-CoV2 Nsp9', 'SARS-CoV2 Nsp10', 'SARS-CoV2 Nsp11', 'SARS-CoV2 Nsp12', 'SARS-CoV2 Nsp13', 'SARS-CoV2 Nsp14', 'SARS-CoV2 Nsp15', 'SARS-CoV2 Spike', 'SARS-CoV2 ORF3a', 'SARS-CoV2 E', 'SARS-CoV2 M', 'SARS-CoV2 ORF8', 'SARS-CoV2 ORF9b', 'SARS-CoV2 ORF9c', 'SARS-CoV2 N', 'SARS-CoV2 ORF10'])


SARS_nodes = {}

rf = open('SARS_CoV2-'+pathway_nametag+'_'+network_type+'_interactions.csv','r')
all_lines = rf.readlines()
rf.close()

for l in all_lines:
    ns = l.rstrip('\r\n').split(',')
    
    if len(ns)>1:  
        SARS_nodes[ns[0]] = []

        for nn in ns[1:]:
            SARS_nodes[ns[0]].append(nn)
            
print(SARS_nodes.keys())

In [3]:
communicated_proteins = pd.read_csv(state_type+'-SARS_CoV2_'+pathway_nametag+'_'+network_type+'_affected_genes0.0.csv')
print(list(communicated_proteins))
print(communicated_proteins['node_ids'].count())

['node_ids', 'node_index', 'SARS-CoV2 Nsp2', 'SARS-CoV2 Nsp7', 'SARS-CoV2 Nsp8', 'SARS-CoV2 Nsp9', 'SARS-CoV2 Nsp11', 'SARS-CoV2 Nsp12', 'SARS-CoV2 Nsp13', 'SARS-CoV2 Nsp14', 'SARS-CoV2 ORF3a', 'SARS-CoV2 E', 'SARS-CoV2 M', 'SARS-CoV2 ORF8', 'SARS-CoV2 ORF9b', 'SARS-CoV2 ORF9c', 'SARS-CoV2 N', 'SARS-CoV2 ORF10']
382


In [4]:
all_ref_gene_names = {}

i = 0
for gen_name in communicated_proteins['node_ids']:
    try:
        all_ref_gene_names[gen_name].append(i)
    except KeyError:
        all_ref_gene_names[gen_name] = [i]
        
    i += 1

In [5]:
# Condense selected protein list
first_indices = []
indices_to_drop = []

for k in all_ref_gene_names.keys():
    first_indices.append(all_ref_gene_names[k][0])
    
    for d in all_ref_gene_names[k][1:]:
        indices_to_drop.append(d)

In [6]:
communicated_proteins = communicated_proteins.drop(indices_to_drop)
all_ref_gene_names = communicated_proteins['node_ids'].to_list()

In [7]:
sars_proteins = list(communicated_proteins)[2:]
print(sars_proteins)

['SARS-CoV2 Nsp2', 'SARS-CoV2 Nsp7', 'SARS-CoV2 Nsp8', 'SARS-CoV2 Nsp9', 'SARS-CoV2 Nsp11', 'SARS-CoV2 Nsp12', 'SARS-CoV2 Nsp13', 'SARS-CoV2 Nsp14', 'SARS-CoV2 ORF3a', 'SARS-CoV2 E', 'SARS-CoV2 M', 'SARS-CoV2 ORF8', 'SARS-CoV2 ORF9b', 'SARS-CoV2 ORF9c', 'SARS-CoV2 N', 'SARS-CoV2 ORF10']


In [8]:
SARS_indirect_nodes = {}
SARS_indirect_nodes_wts = {}

for k in SARS_nodes.keys():
    SARS_indirect_nodes[k] = []
    SARS_indirect_nodes_wts[k] = []
    
print(SARS_nodes.keys())
    
for s in sars_proteins:
    d = communicated_proteins[s].to_numpy()
    
    for i in range(0,d.shape[0]):
        if d[i]>0.0 and all_ref_gene_names[i] not in SARS_indirect_nodes[s]:
            SARS_indirect_nodes[s].append(all_ref_gene_names[i])
            SARS_indirect_nodes_wts[s].append(d[i])

dict_keys(['SARS-CoV2 Nsp2', 'SARS-CoV2 Nsp5', 'SARS-CoV2 Nsp7', 'SARS-CoV2 Nsp8', 'SARS-CoV2 Nsp9', 'SARS-CoV2 Nsp10', 'SARS-CoV2 Nsp11', 'SARS-CoV2 Nsp12', 'SARS-CoV2 Nsp13', 'SARS-CoV2 Nsp14', 'SARS-CoV2 Nsp15', 'SARS-CoV2 Spike', 'SARS-CoV2 ORF3a', 'SARS-CoV2 E', 'SARS-CoV2 M', 'SARS-CoV2 ORF8', 'SARS-CoV2 ORF9b', 'SARS-CoV2 ORF9c', 'SARS-CoV2 N', 'SARS-CoV2 ORF10'])


In [9]:
SARS_affected_refgenes = copy.deepcopy(SARS_nodes)

for s in sars_proteins:
    d = communicated_proteins[s].to_numpy()
    
    for i in range(0,d.shape[0]):
        if d[i]>0.0 and all_ref_gene_names[i] not in SARS_affected_refgenes[s]:
            SARS_affected_refgenes[s].append(all_ref_gene_names[i])
            
    print(s,len(SARS_affected_refgenes[s]))
            
json_obj = json.dumps(SARS_affected_refgenes)
            
# open file for writing, "w" 
f = open(state_type+'-SARS_CoV2_total_'+pathway_nametag+'_'+network_type+'_interactions.json','w')

# write json object to file
f.write(json_obj)

# close file
f.close()          

SARS-CoV2 Nsp2 3
SARS-CoV2 Nsp7 79
SARS-CoV2 Nsp8 7
SARS-CoV2 Nsp9 32
SARS-CoV2 Nsp11 5
SARS-CoV2 Nsp12 118
SARS-CoV2 Nsp13 10
SARS-CoV2 Nsp14 7
SARS-CoV2 ORF3a 2
SARS-CoV2 E 49
SARS-CoV2 M 32
SARS-CoV2 ORF8 144
SARS-CoV2 ORF9b 3
SARS-CoV2 ORF9c 5
SARS-CoV2 N 16
SARS-CoV2 ORF10 4


wf = open('SARS_CoV2_total_'+pathway_nametag+'_'+network_type+'_interactions.csv','w')

for s in SARS_affected_refgenes.keys():
    outline = s
    
    for g in SARS_affected_refgenes[s]:
        outline += ',' + g
        
    print(outline,file=wf)
    
wf.close()

In [10]:
for s in SARS_indirect_nodes.keys():
    if len(SARS_indirect_nodes[s])>0:
        outstring = ''
        
        for g in SARS_indirect_nodes[s]:
            outstring += ', '+g
            
        print(s,outstring)

SARS-CoV2 Nsp2 , EIF4EBP1
SARS-CoV2 Nsp7 , LAT, FGA, STOM, MCEMP1, CD55, CTSZ, SLPI, RAB4B, SERPINA1, CTSC, VAPA, HMOX2, CEACAM3, SEC22B, NDUFS4, NDUFA12, NDUFV3, NDUFS6, NDUFB6, NDUFAF6, NDUFAF5, NDUFAF4, NDUFAF3, NDUFS5, NDUFAF7, HILPDA, HSD17B13, LMAN1L, LMAN2L, CNIH1, AREG, TGFA, CNIH3, GRIA1, ANK3, ANK1, ANK2, ARFGAP2, ARFGAP3, KDELR1, KDELR2, KDELR3, TMEM115, GNG2, GNG12, GNG4, GNG11, GNG10, GNG3, GNG13, GNG7, GNGT1, GNGT2, GNG8, PDCL, GNB4, GNB2, GNB3, GNB5, RNF128, GNA14, GNAQ, GNA15, GNA11, EXOC6, EXOC5
SARS-CoV2 Nsp8 , CASP9, MYLIP, TRIB3, BIRC2, SNW1
SARS-CoV2 Nsp9 , FBXO17, CCNE1, CCNE2, WRAP53, FBXW9, LONP2, FBXL3, SKIV2L, FBXL5, GBA, KIF13A, NOP56, FKBP9, FBXW10, FBXW7, FBXW4, GAPDHS, FBXO4, FBXO6, USP11, FBXW2, ARFGEF2, EIF1AX, EIF5, EIF4B, TNFSF13, XPO1, SET, ANP32A
SARS-CoV2 Nsp11 , TBCB, TBCE, TBCC, TBCD
SARS-CoV2 Nsp12 , TICAM1, IKBKG, RIPK3, FADD, CASP8, TRAF3, SARM1, ECSIT, DHX9, PCBP2, CASP10, DOCK2, PPIE, KRT1, CMTM6, CASP4, CASP2, LONRF1, HNRNPF, NPDC1, PLEKHA4,