# Scanning drugs to counter signaling from viral PPIs

Computes the change in network relative entropy from viral PPIs after addin

In [1]:
import os, sys
import numpy as np
import scipy as sp
import pandas as pd
import copy as copy
from tqdm.notebook import tqdm
import math
import scipy.stats as st

from CoRe import reader
from CoRe.ncip import ncip
from CoRe.BA_C import BA

import importlib

import networkx as nx
import matplotlib.pyplot as plt
import json

In [2]:
data_directory = "/Users/swarnavo/Research/Reactome-Graph-Database/HumanData/SARS-Cov2"
os.chdir(data_directory)

sars_pnames = pd.read_csv('SARS2_proteins_names.csv')['Bait'].to_list()

In [3]:
selected_pathway = 'Immune System'
pathway_nametag = selected_pathway.replace(' ','_')

network_type = 'medium-PPI'

data_directory = "/Users/swarnavo/Research/Reactome-Graph-Database/HumanData/" + pathway_nametag
os.chdir(data_directory)

edge_data = pd.read_pickle(pathway_nametag+'_'+network_type+'-edges.pkl')
node_data = pd.read_pickle(pathway_nametag+'_'+network_type+'-nodes.pkl')

In [4]:
remake_graph = False

if remake_graph==False:
    netObj = ncip()
    netObj.load_graph(pathway_nametag+"-"+network_type+".gml")
else:
    netObj = ncip()
    netObj.load_data(edge_data,node_data)
    netObj.make_graph()
    netObj.save_network(pathway_nametag,network_type)

100%|██████████████████████████████████| 19217/19217 [00:01<00:00, 14167.72it/s]


Communication network Immune_System-medium-PPI-2.gml created.


In [5]:
drugs = []
drug_names = []

all_nodes = netObj.G_d.nodes(data=True)

for n in netObj.G_d.nodes(data=True):
    if 'Drug' in n[1]['class'] and 'lat' not in n[1]['name'] and n[0] not in drug_names:
        print(n[0],n[1]['name'],n[1]['class'])
        drugs.append((n[0],n[1]['class'],n[1]['name']))
        drug_names.append(n[0])
        
print(len(drugs))

R-ALL-9679737 natalizumab [extracellular region] ProteinDrug
R-ALL-9681784 anakinra [extracellular region] ProteinDrug
R-ALL-9681764 isunakinra [extracellular region] ProteinDrug
R-ALL-9678829 baricitinib [cytosol] ChemicalDrug
R-ALL-9678779 tofacitinib [cytosol] ChemicalDrug
R-ALL-9678901 ibrutinib [cytosol] ChemicalDrug
R-ALL-9678960 acalabrutinib [cytosol] ChemicalDrug
R-ALL-9678786 ruxolitinib [cytosol] ChemicalDrug
R-ALL-9715049 sarilumab [extracellular region] ProteinDrug
R-ALL-9715070 satralizumab [extracellular region] ProteinDrug
R-ALL-9681301 tocilizumab [extracellular region] ProteinDrug
R-ALL-9679801 amlexanox [cytosol] ChemicalDrug
R-ALL-9724689 omalizumab [extracellular region] ProteinDrug
R-ALL-9678991 tacrolimus [cytosol] ChemicalDrug
R-ALL-9679471 camrelizumab [extracellular region] ProteinDrug
R-ALL-9679434 cemiplimab [extracellular region] ProteinDrug
R-ALL-9679411 nivolumab [extracellular region] ProteinDrug
R-ALL-9678628 HCQ [cytosol] ChemicalDrug
R-ALL-9717004 del

In [6]:
f = open('SARS_CoV2-'+pathway_nametag+'_interactions.json')
SARS_nodes = json.load(f)
f.close()

all_sars_nodes = []

for s in SARS_nodes.keys():
    all_sars_nodes += SARS_nodes[s]
    
all_sars_nodes = list(set(all_sars_nodes))

print(all_sars_nodes)

['HMOX1', 'RAB10', 'AP2A2', 'GGH', 'IL17RA', 'TOMM70', 'HECTD1', 'ELOC', 'NEU1', 'STOM', 'ELOB', 'EIF4E2', 'RHOA', 'CSNK2B', 'TBK1', 'GLA', 'CYB5R3', 'NLRX1', 'RAB14', 'RIPK1', 'GOLGA7', 'ITGB1', 'RNF41', 'PVR', 'IMPDH2', 'SLC27A2', 'PTGES2', 'ANO6', 'RAB7A', 'RAB18', 'RAB5C', 'SLC44A2', 'ERP44', 'ECSIT', 'RALA', 'NPC2']


**Construction of the information channel model for the network.**

$\rho$ is the communication error for each edge. If a node receives input from multiple edges, the receiver state is the mean of the individual output states. The default channel type is a binary symmetric channel as defined in [Elements of information theory](https://books.google.com/books?hl=en&lr=&id=VWq5GG6ycxMC&oi=fnd&pg=PR15&ots=bZ6fK1WaYP&sig=g2JGYtx-EFJKhoFBG-THaWLfKY0#v=onepage&q&f=false).

In [7]:
initial_state_type = 'maxEnt'

errorname = '0.0'
rho = float(errorname)

input_bits = 1
code_length = int(2**input_bits)

#errorname = str(rho)#.replace('.','_')

max_entropy_state = (1.0/float(code_length))*np.ones(shape=(code_length,))

#if initial_maxEnt==True:
#    initial_state = max_entropy_state
#else:
#    initial_state = np.zeros(shape=(code_length,))
#    initial_state[0] = 1.0

low_state = np.zeros(shape=(code_length,))
low_state[-1] = 1.0

high_state = np.zeros(shape=(code_length,))
high_state[0] = 1.0

if initial_state_type=='high':
    initial_state = high_state
elif initial_state_type=='low':
    initial_state = low_state
else:
    initial_state = max_entropy_state

print(high_state,low_state)

netObj.construct_C(rho,h=input_bits,neglect_modules=[])
node_list = list(netObj.G_d.nodes)

[1. 0.] [0. 1.]


**Initial and boundary conditions for information propagation**

The state of each node is define by the binary probability state $\{P(n=1),P(n=0)\}$, where the abundance (or copy number) the physical entity (n) is coarse-grained into a binary variable high (1) or low (0). The maximum entropy state for each node is $\{0.5,0.5\}$. We set every node in the network initially at the maximum entropy state. 

We assume that direct interaction with SARS-CoV-2 proteins reduces the effective abundance of the associated network nodes, so we set the state of these nodes at $\{0,1\}$. Additionally, we set the state of ATP, ADP, and Pi at the maximum entropy state $\{0.5,0.5\}$.

In [8]:
additional_source_nodes = []#["R-ALL-139836","R-ALL-196180","R-ALL-113592","R-ALL-29370","R-ALL-29358","R-ALL-113582","R-ALL-29372"]

In [9]:
netObj.disconnect_drug_nodes()

In [11]:
initial_network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
network_sources = {}

for n in range(0,len(node_list)):
    initial_network_state[code_length*n:code_length*(n+1),0] = initial_state
    
network_sources = []

reference_final_state, steps = netObj.get_final_state(initial_network_state,[])
reference_final_entropy = netObj.state_entropy(reference_final_state,[])
print('Reference state relative entropy: ',reference_final_entropy)

Reference state relative entropy:  0.0


In [12]:
network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
network_sources = []

for n in range(0,len(node_list)):
    network_state[code_length*n:code_length*(n+1),0] = initial_state

for k in tqdm(SARS_nodes.keys()):
    for n in SARS_nodes[k]:
        try:
            i = node_list.index(n)

            network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = low_state

            if i not in network_sources:
                network_sources.append(i)
        except ValueError:
            pass
        
    for n in additional_source_nodes:
        try:
            i = node_list.index(n)

            network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = initial_state

            network_sources.append(i)
        except ValueError:
            pass
        
    #print(np.linalg.norm(network_state[k],1),len(network_sources[k]))
    
print(network_sources)

  0%|          | 0/17 [00:00<?, ?it/s]

[2075, 2672, 1996, 3076, 2031, 1832, 1825, 1646, 1723, 1973, 1843, 2236, 1713, 632, 714, 1978, 1865, 2222, 1810, 3041, 2059, 1680, 1656, 3013, 3214, 1979, 1795, 3151, 1785, 1768, 1269, 1283, 853, 1859, 2792, 2793]


**Relative entropy of the total network and number of steps to stationary state.**

In [13]:
final_state, steps = netObj.get_final_state(network_state,network_sources)
final_entropy = netObj.state_entropy(final_state,network_sources,reference_final_state)
print(final_entropy)

31.310427528256373


In [14]:
entropy_shifts = {}
H_drops = {}
H_gains = {}

for s_pair in tqdm(drugs):
    s = s_pair[0]
    additional_source_nodes = [s]
    
    netObj.load_graph(pathway_nametag+"-"+network_type+"-2.gml")
    netObj.disconnect_drug_nodes(skip=s)
    
    netObj.construct_C(rho,h=input_bits)
    #netObj.disconnect_nodes('ChemicalDrug',additional_source_nodes)
    #netObj.disconnect_nodes('ProteinDrug',additional_source_nodes)
    
    network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
    network_sources = []
    
    for n in range(0,len(node_list)):
        network_state[code_length*n:code_length*(n+1),0] = initial_state

    for k in SARS_nodes.keys():
        for n in SARS_nodes[k]:
            try:
                i = node_list.index(n)

                network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = low_state

                network_sources.append(i)
            except ValueError:
                pass

        for n in additional_source_nodes:
            try:
                i = node_list.index(n)

                network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = high_state

                network_sources.append(i)
            except ValueError:
                pass

        #print(np.linalg.norm(network_state[k],1),len(network_sources[k]))
        
    entropy_shifts[s] = 0.0

    this_state, steps = netObj.get_final_state(network_state,network_sources)
    this_entropy = netObj.state_entropy(this_state,network_sources,reference_final_state)
    H_drop, H_gain = netObj.entropy_drop_and_rise(this_state,final_state,reference_final_state,network_sources)
                
    entropy_shifts[s] = this_entropy
    
    H_drops[s] = H_drop
    H_gains[s] = H_gain

  0%|          | 0/19 [00:00<?, ?it/s]

In [15]:
try:
    os.chdir(data_directory+'/counter_entropic_shift')
except OSError:
    os.mkdir(data_directory+'/counter_entropic_shift')
    os.chdir(data_directory+'/counter_entropic_shift')

In [16]:
node_data = nx.get_node_attributes(netObj.G_d,"name")
node_class = nx.get_node_attributes(netObj.G_d,"class")

of = open('2-high_all_drug_shifts-'+initial_state_type+'.csv','w')

print('Drug,Relative Entropy,Drug Type',file=of)

print('Ref,'+str(final_entropy)+',0',file=of)

for s in drugs:
    if node_class[s[0]]=="Complex":
        this_name = node_data[s[0]]
        this_name = this_name.replace(',',';')
    else:
        this_name = s[2].split(' [')[0]
        
    print(this_name+','+str(entropy_shifts[s[0]])+','+str(s[1]),file=of)
    
of.close()

of = open('2-split_all_drug_shifts-'+initial_state_type+'.csv','w')

print('Drug,Drop,Gain,Drug Type',file=of)

for s in drugs:
    if node_class[s[0]]=="Complex":
        this_name = node_data[s[0]]
        this_name = this_name.replace(',',';')
    else:
        this_name = s[2].split(' [')[0]
        
    print(this_name+','+str(H_drops[s[0]])+','+str(H_gains[s[0]])+','+str(s[1]),file=of)
    
of.close()

In [None]:
print(all_sources)