In [45]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
# Libraries
import pandas as pd
import os

from pyvis.network import Network

from src.data import prep_data as prep
from src.models import model_schemata as schema
from src.models import build_model as build

In [47]:
os.chdir('/Users/magdalena/OneDrive - Queen Mary, University of London/bezzlab/research/projects/phospho_pi/')
# os.chdir('/home/mhuebner/Desktop/bezzlab/research/projects/phospho_pi/')

In [48]:
es_interaction = pd.read_csv('data/processed/ebdt_data/es_interaction.csv')
e_function = pd.read_csv('data/processed/ebdt_data/e_function.csv')
p_fc = pd.read_csv('data/processed/ebdt_data/p_fc_HL60.csv')
e_ksea = pd.read_csv('data/processed/ebdt_data/e_ksea_HL60.csv')

In [49]:
# Filtering
# remove activity data where tc is smaller or equal to 1
e_ksea = e_ksea[e_ksea['tc'] > 1]
# remove interactions where the kpa is not in the activity data
es_interaction = es_interaction[es_interaction['enzyme'].isin(e_ksea['enzyme'])]
es_interaction = es_interaction[es_interaction['substrate'].isin(p_fc['phosphosite'])]
# select fold change data for psts which are in interaction data
p_fc = p_fc[p_fc['phosphosite'].isin(es_interaction['substrate'])]
# select enzyme function data for enzymes which are in interaction data
e_function = e_function[e_function['enzyme'].isin(es_interaction['enzyme'])]

In [17]:
# Extract edges from the data
targets = [(row['enzyme'], row['substrate']) for _, row in es_interaction.iterrows()]
phosphatases = set(e_function['enzyme'][e_function['function'] == 'phosphatase'].tolist())
print(targets[:10])
print(phosphatases)

[('SRC', 'PXN(Y88)'), ('SRC', 'PXN(Y118)'), ('SRC', 'ANXA2P2(Y24)'), ('SRC', 'PTTG1IP(Y174)'), ('SRC', 'BCLAF1(Y284)'), ('SRC', 'HIPK2(Y361)'), ('ABL1', 'PXN(Y118)'), ('ABL1', 'ABI1(Y213)'), ('ABL1', 'RBM39(Y95)'), ('PTK2', 'PXN(Y118)')]
{'DUSP1', 'PTPRE', 'PPP3CA', 'CDC25A', 'PPP1CA', 'PPP1CB', 'PHLPP2', 'CTDSPL', 'PPP3CB', 'CDC14B', 'PTPRG', 'PTPRJ', 'PPM1A', 'PHLPP1', 'PTPRR', 'DUSP4', 'PPP2CB', 'CDC25B', 'CDC14A', 'CDC25C', 'DUSP3', 'PPP2CA', 'PPP1CC', 'PPP3CC', 'PPM1B'}


In [43]:
subnet = prep.generate_subnetwork(e_ns=es_interaction['enzyme'].unique().tolist(), p_ns=es_interaction['substrate'].unique().tolist(), 
                                  edges=targets, num_p_nodes=9, max_e_neighbors=500, phosphatases = phosphatases, e_total=8)

Nodes in the subnetwork: ['PXN(Y118)', 'BCLAF1(Y284)', 'HIPK2(Y361)', 'PXN(Y88)', 'ABI1(Y213)', 'SRC', 'PTPRG', 'ABL1', 'PTK2', 'PTK6', 'FYN', 'HIPK2', 'PTPRR']
Edges in the subnetwork: [('SRC', 'PXN(Y118)'), ('SRC', 'BCLAF1(Y284)'), ('SRC', 'HIPK2(Y361)'), ('SRC', 'PXN(Y88)'), ('PTPRG', 'PXN(Y118)'), ('ABL1', 'PXN(Y118)'), ('ABL1', 'ABI1(Y213)'), ('PTK2', 'PXN(Y118)'), ('PTK6', 'PXN(Y118)'), ('FYN', 'PXN(Y118)'), ('HIPK2', 'HIPK2(Y361)'), ('PTPRR', 'PXN(Y88)')]


In [44]:
# make subnet into targets data frame
targets_sub = pd.DataFrame(subnet.edges, columns=['enzyme', 'phosphosite'])
targets_sub

Unnamed: 0,enzyme,phosphosite
0,SRC,PXN(Y118)
1,SRC,BCLAF1(Y284)
2,SRC,HIPK2(Y361)
3,SRC,PXN(Y88)
4,PTPRG,PXN(Y118)
5,ABL1,PXN(Y118)
6,ABL1,ABI1(Y213)
7,PTK2,PXN(Y118)
8,PTK6,PXN(Y118)
9,FYN,PXN(Y118)


In [22]:
# compare interaction and targets_sub by comparing list of tuples
targets_sub_list = [tuple(x) for x in targets_sub.to_numpy()]
interaction_list = [tuple(x) for x in es_interaction[['enzyme', 'substrate']].to_numpy()]
set(targets_sub_list) - set(interaction_list)

KeyError: "['substrate'] not in index"

In [51]:
es_interaction = pd.read_csv('data/processed/ebdt_data/sub_network_p/es_interaction.csv')

In [52]:
targets = [(row['enzyme'], row['phosphosite']) for _, row in es_interaction.iterrows()]

Unnamed: 0,enzyme,phosphosite
0,SRC,PXN(Y88)
1,SRC,PXN(Y118)
2,SRC,ANXA2P2(Y24)
3,SRC,PTTG1IP(Y174)
4,SRC,BCLAF1(Y284)
5,SRC,HIPK2(Y361)
6,ABL1,PXN(Y118)
7,ABL1,ABI1(Y213)
8,ABL1,RBM39(Y95)
9,PTK2,PXN(Y118)


In [54]:
import networkx as nx
G = nx.DiGraph()
G.add_nodes_from(es_interaction['enzyme'].unique().tolist(), bipartite=0)
G.add_nodes_from(es_interaction['phosphosite'].unique().tolist(), bipartite=1)
G.add_edges_from(targets)

In [70]:
# Create a pyvis network
net = Network()

# Add nodes and edges from the bipartite graph
for node, attributes in G.nodes(data=True):
    if attributes["bipartite"] == 0 and node in phosphatases:
        net.add_node(node, color="#E0F4DA", shape="square", borderWidth=3)
    elif attributes["bipartite"] == 0:
        net.add_node(node, color="#E0F4DA", shape="square", borderWidth=1.5)
    else:
        net.add_node(node, color="#D8F0F6", borderWidth=1.5)

for edge in G.edges():
    net.add_edge(edge[0], edge[1])

# Show the network
net.show('data/processed/ebdt_data/sub_network_p/bipartite_network2.html')

In [14]:
# Filtering
# select fold change data for psts which are in interaction data
p_fc_sub = p_fc[p_fc['phosphosite'].isin(targets_sub['phosphosite'])].reset_index(drop=True)
# select activity data for enzymes which are in interaction data
e_ksea_sub = e_ksea[e_ksea['enzyme'].isin(targets_sub['enzyme'])].reset_index(drop=True)
# define all pst which are in fold change data or interaction data and store in data frame
phosphosites = list(set(p_fc_sub['phosphosite'].tolist() + targets_sub['phosphosite'].tolist()))
# define all enzymes which are in activity, interaction or enz_class data and store in data frame
enzymes = list(set(e_ksea_sub['enzyme'].tolist() + targets_sub['enzyme'].tolist()))
e_function_sub = e_function[e_function['enzyme'].isin(targets_sub['enzyme'])].reset_index(drop=True)

Building skeleton

In [15]:
# Mapping data to Problog predicates
predicates = {}
predicates['enzyme'] = schema.EnzymePredicate(enzyme_list=enzymes)
predicates['phosphosite'] = schema.PhosphositePredicate(phosphosite_list=phosphosites)
predicates['es_interaction'] = schema.ESInteractionPredicate(dataframe=targets_sub, enzyme_col='enzyme', phosphosite_col='phosphosite')
predicates['e_function'] = schema.EFunctionPredicate(dataframe=e_function_sub, enzyme_col='enzyme', function_col='function')

# Adding entites, relationships, and fixed attributes to template model
model_skeleton = 'models/ebdt_data/sub_network/p_model/p_model_skeleton.pl'

for predicate in predicates:
    fact_generator = build.ProblogStatementGenerator(predicates[predicate])
    problog_facts = fact_generator.generate_facts(build.FactTemplate) # generate Problog facts
    build.insert_statements(model=model_skeleton, statements=problog_facts, location='%% {}'.format(predicate)) # insert into Problog file

In [16]:
# save es_interaction, e_function, e_ksea, p_fc, e_activity and p_occupancy to csv
targets_sub.to_csv('data/processed/ebdt_data/sub_network_p/es_interaction.csv', index=False)
e_function_sub.to_csv('data/processed/ebdt_data/sub_network_p/e_function.csv', index=False)
e_ksea_sub.to_csv('data/processed/ebdt_data/sub_network_p/e_ksea.csv', index=False)
p_fc_sub.to_csv('data/processed/ebdt_data/sub_network_p/p_fc.csv', index=False)

In [8]:
es_interaction = pd.read_csv('data/processed/ebdt_data/sub_network_p/es_interaction.csv')

In [12]:
es_interaction.phosphosite.unique().shape

(9,)