# Artificial protein expression screening

**Computes the change in network relative entropy from viral PPIs after setting immune system proteins, one at a time, to high abundance**

In [1]:
import os, sys
import numpy as np
import scipy as sp
import pandas as pd
import copy as copy
from tqdm.notebook import tqdm
import math
import scipy.stats as st

from CoRe import reader
from CoRe.ncip import ncip
from CoRe.BA_C import BA

import importlib

import networkx as nx
import matplotlib.pyplot as plt
import json

In [2]:
data_directory = "/Users/swarnavo/Research/Reactome-Graph-Database/HumanData/SARS-Cov2"
os.chdir(data_directory)

sars_pnames = pd.read_csv('SARS2_proteins_names.csv')['Bait'].to_list()

In [3]:
selected_pathway = 'Immunometabolism'
pathway_nametag = selected_pathway.replace(' ','_')

network_type = 'medium-PPI'

data_directory = "/Users/swarnavo/Research/Reactome-Graph-Database/HumanData/" + pathway_nametag
os.chdir(data_directory)

edge_data = pd.read_pickle(pathway_nametag+'_'+network_type+'-edges.pkl')
node_data = pd.read_pickle(pathway_nametag+'_'+network_type+'-nodes.pkl')

In [4]:
remake_graph = False

if remake_graph==False:
    netObj = ncip()
    netObj.load_graph(pathway_nametag+"-"+network_type+".gml")
else:
    netObj = ncip()
    netObj.load_data(edge_data,node_data)
    netObj.make_graph()
    netObj.save_network(pathway_nametag,network_type)

In [None]:
**All immune system communication network proteins that have PPI with SARS-CoV-2 proteins**

In [5]:
f = open('SARS_CoV2-'+pathway_nametag+'_interactions.json')
SARS_nodes = json.load(f)
f.close()

all_sars_nodes = []

for s in SARS_nodes.keys():
    all_sars_nodes += SARS_nodes[s]
    
all_sars_nodes = list(set(all_sars_nodes))

print(all_sars_nodes)

['RAB14', 'SLU7', 'NPC2', 'ADAMTS1', 'GGH', 'RAB10', 'CYB5R3', 'EIF4H', 'NUP214', 'PTGES2', 'GOLGA7', 'NEU1', 'TOMM70', 'HDAC2', 'PLEKHA5', 'NLRX1', 'ACADM', 'STOM', 'IMPDH2', 'RALA', 'RBM28', 'ECSIT', 'RNF41', 'HECTD1', 'GNB1', 'GLA', 'DNMT1', 'PRKACA', 'RHOA', 'AP2A2', 'ANO6', 'GNG5', 'SLC27A2', 'TBK1', 'PPIL3', 'CSNK2B', 'USP13', 'CWC27', 'UPF1', 'ELOB', 'LMAN2', 'RIPK1', 'IL17RA', 'TBCA', 'EXOSC5', 'ITGB1', 'RAB18', 'HMOX1', 'PABPC1', 'PVR', 'RAB7A', 'SLC44A2', 'RAB5C', 'ELOC', 'ERP44', 'NDUFAF2', 'EIF4E2', 'DCAF7']


**Construction of the information channel model for the network.**

$\rho$ is the communication error for each edge. If a node receives input from multiple edges, the receiver state is the mean of the individual output states. The default channel type is a binary symmetric channel as defined in [Elements of information theory](https://books.google.com/books?hl=en&lr=&id=VWq5GG6ycxMC&oi=fnd&pg=PR15&ots=bZ6fK1WaYP&sig=g2JGYtx-EFJKhoFBG-THaWLfKY0#v=onepage&q&f=false).

In [6]:
initial_state_type = 'maxEnt'

errorname = '0.0'
rho = float(errorname)

input_bits = 1
code_length = int(2**input_bits)

max_entropy_state = (1.0/float(code_length))*np.ones(shape=(code_length,))

low_state = np.zeros(shape=(code_length,))
low_state[-1] = 1.0

high_state = np.zeros(shape=(code_length,))
high_state[0] = 1.0

if initial_state_type=='high':
    initial_state = high_state
elif initial_state_type=='low':
    initial_state = low_state
else:
    initial_state = max_entropy_state

print(high_state,low_state)

netObj.construct_C(rho,h=input_bits,neglect_modules=[])
node_list = list(netObj.G_d.nodes)

[1. 0.] [0. 1.]


**Initial and boundary conditions for information propagation**

The state of each node is define by the binary probability state $\{P(n=1),P(n=0)\}$, where the abundance (or copy number) the physical entity (n) is coarse-grained into a binary variable high (1) or low (0). The maximum entropy state for each node is $\{0.5,0.5\}$. We set every node in the network initially at the maximum entropy state. 

We assume that direct interaction with SARS-CoV-2 proteins reduces the effective abundance of the associated network nodes, so we set the state of these nodes at $\{0,1\}$. Additionally, we set the state of ATP, ADP, and Pi at the maximum entropy state $\{0.5,0.5\}$.

In [7]:
additional_source_nodes = []#["R-ALL-139836","R-ALL-196180","R-ALL-113592","R-ALL-29370","R-ALL-29358","R-ALL-113582","R-ALL-29372"]

In [8]:
netObj.disconnect_drug_nodes()

In [9]:
initial_network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
network_sources = {}

for n in range(0,len(node_list)):
    initial_network_state[code_length*n:code_length*(n+1),0] = initial_state
    
network_sources = []

reference_final_state, steps = netObj.get_final_state(initial_network_state,[])
reference_final_entropy = netObj.state_entropy(reference_final_state,[])
print('Reference state relative entropy: ',reference_final_entropy)

Reference state relative entropy:  0.0


In [10]:
network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
network_sources = []

for n in range(0,len(node_list)):
    network_state[code_length*n:code_length*(n+1),0] = initial_state

for k in tqdm(SARS_nodes.keys()):
    for n in SARS_nodes[k]:
        try:
            i = node_list.index(n)

            network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = low_state

            if i not in network_sources:
                network_sources.append(i)
        except ValueError:
            pass
        
    for n in additional_source_nodes:
        try:
            i = node_list.index(n)

            network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = high_state

            network_sources.append(i)
        except ValueError:
            pass
        
    #print(np.linalg.norm(network_state[k],1),len(network_sources[k]))
    
print(network_sources)

  0%|          | 0/20 [00:00<?, ?it/s]

[2377, 6572, 1756, 2780, 1641, 1672, 6430, 6413, 6184, 3533, 7881, 1941, 7851, 7033, 7228, 7094, 7497, 632, 7463, 5770, 3417, 6946, 714, 1712, 1927, 2745, 7479, 5517, 1651, 2718, 2918, 6584, 2855, 1688, 6376, 1269, 1283, 853, 7255, 7549, 7909, 2497, 2498]


**Relative entropy of the total network and number of steps to stationary state.**

In [11]:
final_state, steps = netObj.get_final_state(network_state,network_sources)
final_entropy = netObj.state_entropy(final_state,network_sources)
print(final_entropy)

277.51601080892686


In [12]:
node_class = nx.get_node_attributes(netObj.G_d,"class")
node_n = list(netObj.G_d.nodes())

c = 0

for i in range(0,len(node_n)):
    nn = node_n[i]
    if node_class[nn]=='EntityWithAccessionedSequence':
        relH = st.entropy(final_state[netObj.code_length*i:netObj.code_length*(i+1),0],max_entropy_state,base=2)
        
        if relH>0.01:
            c += 1
            
print(c)

1757


In [13]:
print(final_entropy,steps)

277.51601080892686 3634


In [14]:
all_sources = []

for n in netObj.G_d.nodes(data=True):
    #if netObj.G_d.in_degree(n[0])==0:
    #    if n[1]['class']=='EntityWithAccessionedSequence' or n[1]['class']=='Complex':
    if n[1]['class']=='EntityWithAccessionedSequence' and n[0] not in all_sars_nodes:
            all_sources.append((n[0],netObj.G_d.in_degree(n[0])))
            
print(len(all_sources))

2093


In [15]:
entropy_shifts = {}
H_drops = {}
H_gains = {}

for s_pair in tqdm(all_sources):
    s = s_pair[0]
    additional_source_nodes = [s]
    
    netObj.construct_C(rho,h=input_bits)
    netObj.disconnect_nodes('ChemicalDrug',additional_source_nodes)
    netObj.disconnect_nodes('ProteinDrug',additional_source_nodes)
    
    network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
    network_sources = []
    
    for n in range(0,len(node_list)):
        network_state[code_length*n:code_length*(n+1),0] = initial_state

    for k in SARS_nodes.keys():
        for n in SARS_nodes[k]:
            try:
                i = node_list.index(n)

                network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = low_state

                network_sources.append(i)
            except ValueError:
                pass

        for n in additional_source_nodes:
            try:
                i = node_list.index(n)

                network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = high_state

                network_sources.append(i)
            except ValueError:
                pass

        #print(np.linalg.norm(network_state[k],1),len(network_sources[k]))
        
    entropy_shifts[s] = 0.0

    this_state, steps = netObj.get_final_state(network_state,network_sources)
    this_entropy = netObj.state_entropy(this_state,network_sources)
    H_drop, H_gain = netObj.entropy_drop_and_rise(this_state,final_state,reference_final_state,network_sources)
                
    entropy_shifts[s] = this_entropy
        
    H_drops[s] = H_drop
    H_gains[s] = H_gain

  0%|          | 0/2093 [00:00<?, ?it/s]

In [16]:
try:
    os.chdir(data_directory+'/counter_entropic_shift')
except OSError:
    os.mkdir(data_directory+'/counter_entropic_shift')
    os.chdir(data_directory+'/counter_entropic_shift')

In [17]:
node_data = nx.get_node_attributes(netObj.G_d,"name")
node_class = nx.get_node_attributes(netObj.G_d,"class")

of = open('high_all_protein_shifts.csv','w')

print('Gene,Relative Entropy,In Degree',file=of)

print('Ref,'+str(final_entropy)+',0',file=of)

for s in all_sources:
    if node_class[s[0]]=="Complex":
        this_name = node_data[s[0]]
        this_name = this_name.replace(',',';')
    else:
        this_name = s[0]
        
    print(this_name+','+str(entropy_shifts[s[0]])+','+str(s[1]),file=of)
    
of.close()

of = open('split_all_high_protein_shifts-'+initial_state_type+'.csv','w')

print('Protein,Drop,Gain',file=of)

for s in all_sources:
    if node_class[s[0]]=="Complex":
        this_name = node_data[s[0]]
        this_name = this_name.replace(',',';')
    else:
        this_name = s[0]
        
    print(this_name+','+str(H_drops[s[0]])+','+str(H_gains[s[0]]),file=of)
    
of.close()