In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Libraries
import pandas as pd
import os

from pyvis.network import Network

from src.data import prep_data as prep
from src.models import model_schemata as schema
from src.models import build_model as build

In [3]:
os.chdir('/Users/magdalena/OneDrive - Queen Mary, University of London/bezzlab/research/projects/phospho_pi/')
# os.chdir('/home/mhuebner/Desktop/bezzlab/research/projects/phospho_pi/')

In [8]:
# es_interaction = pd.read_csv('data/processed/ebdt_data/es_interaction.csv')
dk_interaction = pd.read_csv('data/processed/ebdt_data/dk_interaction.csv')
p_regulates = pd.read_csv('data/processed/ebdt_data/p_regulates.csv')
p_function = pd.read_csv('data/processed/ebdt_data/p_function.csv')
e_function = pd.read_csv('data/processed/ebdt_data/e_function.csv')
p_fc = pd.read_csv('data/processed/ebdt_data/p_fc_scaled_HL60.csv')
e_ksea = pd.read_csv('data/processed/ebdt_data/e_ksea_penalised_HL60.csv')

In [5]:
es_pmodel = pd.read_csv('data/processed/ebdt_data/sub_network_p/es_interaction.csv')

In [6]:
# Filtering
p_regulates = p_regulates[p_regulates['protein'].isin(es_pmodel['enzyme'])].reset_index(drop=True)
dk_interaction = dk_interaction[dk_interaction['enzyme'].isin(es_pmodel['enzyme'])].reset_index(drop=True)
dk_interaction = dk_interaction[dk_interaction['drug'].isin(p_fc['sample'])].reset_index(drop=True)

Plotting

In [100]:
# Labelling
phosphatases = set(e_function['enzyme'][e_function['function'] == 'phosphatase'].tolist())
kinases = set(e_function['enzyme'][e_function['function'] == 'kinase'].tolist())

In [101]:
regulates = [(row['phosphosite'], row['protein']) for _, row in p_regulates.iterrows()]
inhibits = [(row['drug'], row['enzyme']) for _, row in dk_interaction.iterrows()]

In [102]:
import networkx as nx
G = nx.DiGraph()
G.add_nodes_from(dk_interaction['drug'].unique().tolist(), bipartite=0)
G.add_nodes_from(dk_interaction['enzyme'].unique().tolist(), bipartite=1)
G.add_nodes_from(p_regulates['phosphosite'].unique().tolist(), bipartite=2)
G.add_nodes_from(p_regulates['protein'].unique().tolist(), bipartite=1)
G.add_edges_from(regulates)
G.add_edges_from(inhibits)

In [103]:
# Create a pyvis network
net = Network()

# Add nodes and edges from the bipartite graph
for node, attributes in G.nodes(data=True):
    if attributes["bipartite"] == 0:
        net.add_node(node, color="#F6BF93", shape="triangle", borderWidth=1.5)
    elif attributes["bipartite"] == 1 and node in phosphatases:
        net.add_node(node, color="#E0F4DA", shape="square", borderWidth=3)
    elif attributes["bipartite"] == 1 and node in kinases:
        net.add_node(node, color="#E0F4DA", shape="square", borderWidth=1.5)
    elif attributes["bipartite"] == 1:
        net.add_node(node, color="#F2F2F2", shape="square", borderWidth=1.5)
    else:
        net.add_node(node, color="#D8F0F6", borderWidth=1.5)

for edge in G.edges():
    net.add_edge(edge[0], edge[1])

# Show the network
net.show('data/processed/ebdt_data/sub_network_e/bipartite_network.html')

Filtering

In [110]:
# Filtering
# select fold change data for psts which are in interaction data
p_fc_sub = p_fc[p_fc['phosphosite'].isin(p_regulates['phosphosite'])].reset_index(drop=True)
# select activity data for enzymes which are in interaction data
e_ksea_sub = e_ksea[e_ksea['enzyme'].isin(dk_interaction['enzyme']) | e_ksea['enzyme'].isin(p_regulates['protein'])].reset_index(drop=True)# select enzyme function data for enzymes which are in interaction data
# select phosphosite function data for phosphosites which are in interaction data
p_function_sub = p_function[p_function['phosphosite'].isin(p_regulates['phosphosite'])].reset_index(drop=True)

# define all pst which are in fold change data or interaction data and store in data frame
phosphosites = list(set(p_fc_sub['phosphosite'].tolist() + p_regulates['phosphosite'].tolist()))
# define all enzymes which are in activity, interaction or enz_class data and store in data frame
enzymes = list(set(e_ksea_sub['enzyme'].tolist() + p_regulates['protein'].tolist() + dk_interaction['enzyme'].tolist()))
# define all drugs
drugs = list(set(dk_interaction['drug']))

Building skeleton

In [111]:
# Mapping data to Problog predicates
predicates = {}
predicates['enzyme'] = schema.EnzymePredicate(enzyme_list=enzymes)
predicates['phosphosite'] = schema.PhosphositePredicate(phosphosite_list=phosphosites)
predicates['drug'] = schema.DrugPredicate(drug_list=drugs)
predicates['p_regulates'] = schema.PRegulatesPredicate(dataframe=p_regulates, phosphosite_col='phosphosite', protein_col='protein')
predicates['dk_interaction'] = schema.DKInteractionPredicate(dataframe=dk_interaction, drug_col='drug', enzyme_col='enzyme')
predicates['p_function'] = schema.PFunctionPredicate(dataframe=p_function_sub, phosphosite_col='phosphosite', function_col='function')

# Adding entites, relationships, and fixed attributes to template model
model_skeleton = 'models/ebdt_data/sub_network/e_model/e_model_skeleton.pl'

for predicate in predicates:
    fact_generator = build.ProblogStatementGenerator(predicates[predicate])
    problog_facts = fact_generator.generate_facts(build.FactTemplate) # generate Problog facts
    build.insert_statements(model=model_skeleton, statements=problog_facts, location='%% {}'.format(predicate)) # insert into Problog file

In [112]:
# save es_interaction, e_function, e_ksea, p_fc, e_activity and p_occupancy to csv
p_regulates.to_csv('data/processed/ebdt_data/sub_network_e/p_regulates.csv', index=False)
dk_interaction.to_csv('data/processed/ebdt_data/sub_network_e/dk_interaction.csv', index=False)
p_function_sub.to_csv('data/processed/ebdt_data/sub_network_e/p_function.csv', index=False)
e_ksea_sub.to_csv('data/processed/ebdt_data/sub_network_e/e_ksea.csv', index=False)
p_fc_sub.to_csv('data/processed/ebdt_data/sub_network_e/p_fc.csv', index=False)