#### Prerequesites:
* Define schema (make figure) 
* Define structure (make figure)
* Define schema class in python (src/models/model_schemata.py)
* Convert schema + structure into model template (models/templates/p_model.pl)

#### Procedure:
* Load data
* Adjust model script for lfi accordingly
* Create evidence file (if I don't need different evidence files, move to builmodel notebook)
* LFI

Multiple instances, 1 pst, X enzymes, one sample per instance, multiple enzymes per instance, test how many enzymes can have in one instances, how computing time depends on N p::f for 60 samples

In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import os
import random

from src.models import model_schemata as schema
from src.models import build_model as build
from src.visualization import visualize as viz

In [2]:
# os.chdir('/Users/magdalena/OneDrive - Queen Mary, University of London/bezzlab/research/projects/phospho_pi/')
os.chdir('/home/mhuebner/Desktop/bezzlab/research/projects/phospho_pi/')

#### Training/Testing

In [4]:
# reading csv from files into dict
data = {}
data['e_ksea'] = pd.read_csv('data/processed/ebdt_data/sub_network_p/e_ksea_penalised.csv')
data['p_fc'] = pd.read_csv('data/processed/ebdt_data/sub_network_p/p_fc_scaled.csv')
data['es_interaction'] = pd.read_csv('data/processed/ebdt_data/sub_network_p/es_interaction.csv')
data['p_fc']

Unnamed: 0,phosphosite,sample,value,prob,p_dec,p_inc
0,ABI1(Y213),AZD5438,inc,0.795799,0.001000,0.795799
1,ANXA2P2(Y24),AZD5438,dec,0.729442,0.729442,0.001000
2,ATF1(S198),AZD5438,dec,0.999546,0.999546,0.001000
3,BCLAF1(Y284),AZD5438,inc,0.886673,0.001000,0.886673
4,HIPK2(Y361),AZD5438,dec,0.460094,0.460094,0.001000
...,...,...,...,...,...,...
544,HIPK2(Y361),MK2206,dec,0.171695,0.171695,0.001000
545,PTTG1IP(Y174),MK2206,dec,0.569925,0.569925,0.001000
546,PXN(Y118),MK2206,dec,0.408879,0.408879,0.001000
547,PXN(Y88),MK2206,inc,0.155731,0.001000,0.155731


Splitting data into training/testing

In [9]:
# getting sample names (union of all samples in all datasets)
samples = list(set(data['e_ksea']['sample']).union(set(data['p_fc']['sample'])))
samples.sort()
# sample x% of samples randomly without replacement with seed
random.seed(612)
train = random.sample(samples, int(len(samples)*1))
print(train)
# filter data
training_data = {}
training_data['e_ksea'] = data['e_ksea'][data['e_ksea']['sample'].isin(train)].reset_index(drop=True)
training_data['p_fc'] = data['p_fc'][data['p_fc']['sample'].isin(train)].reset_index(drop=True)

['TAK715', 'TGX221', 'CHIR99021', 'CAL101', 'Ripasudil', 'KN93', 'LY2584702', 'LY2090314', 'Ku0063794', 'SP600125', 'Ribociclib', 'U73122', 'AZD5438', 'GDC0994', 'Edelfosine', 'PF3758309', 'Amuvatinib', 'AT13148', 'AZD3759', 'Go6976', 'JNJ', 'NU7441', 'GSK2334470', 'Ipatasertib', 'AZD5363', 'LY2835219', 'Torin', 'Bosutinib', 'H89', 'DNAPK', 'GDC0941', 'FRAX486', 'CX4945', 'HS173', 'Linsitinib', 'Dabrafenib', 'AZD1480', 'BX912', 'AZ20', 'TBCA', 'Selumetinib', 'GF109203X', 'AZD6482', 'AC220', 'PH797804', 'AZD8055', 'Trametinib', 'JNK']


In [10]:
# Mapping data to Problog predicates
predicates = {}
predicates['e_ksea'] = schema.EKseaPredicate()
predicates['e_ksea'].add_data(training_data['e_ksea'], 'enzyme', 'sample', 'value')
predicates['p_fc'] = schema.PFoldChangePredicate()
predicates['p_fc'].add_data(training_data['p_fc'], 'phosphosite', 'sample', 'value')

In [11]:
samples = list(set(training_data['e_ksea']['sample']).union(set(training_data['p_fc']['sample'])))
phosphosites = list(set(training_data['p_fc']['phosphosite']))
evidence_dict = {}
for s in samples:
    evidence_dict[s] = {}
    for p in phosphosites:
        enzymes = data['es_interaction']['enzyme'][data['es_interaction']['phosphosite'] == p].tolist()
        evid_generator = build.ProblogStatementGenerator(predicates['e_ksea'])
        evidence_e = []
        for e in enzymes:
            evidence_e.extend(evid_generator.generate_facts(build.EvidenceTemplate, select=[s, e]))
        evid_generator = build.ProblogStatementGenerator(predicates['p_fc'])
        evidence_p = evid_generator.generate_facts(build.EvidenceTemplate, select=[s, p])
        evidence_dict[s][p] = evidence_e + evidence_p

In [8]:
# write evidence_dict to text file: loop over samples and separate by line of hyphens
with open('models/ebdt_data/sub_network/p_model/evidence_exp11.pl', 'w') as f:
    for s in evidence_dict:
        for p in evidence_dict[s]:
            for e in evidence_dict[s][p]:
                f.write(e + '\n')
            f.write('--------------------' + '\n')

In [12]:
# write evidence_dict to text file: loop over samples and separate by line of hyphens
with open('models/ebdt_data/sub_network/p_model/evidence_exp11_training.pl', 'w') as f:
    for s in evidence_dict:
        for p in evidence_dict[s]:
            for e in evidence_dict[s][p]:
                f.write(e + '\n')
            f.write('--------------------' + '\n')

LFI

In [14]:
DIR = 'models/ebdt_data/sub_network/p_model/'
max_iter = 200
# learning from interpretation
cmd = f'problog lfi {DIR}p_model_lfi_exp11.pl {DIR}evidence_exp11.pl -O {DIR}p_model_exp11_200i.pl --logger {DIR}log_exp11-200i.txt -k ddnnf -v -n {max_iter}'
os.system(cmd)

-456.4436725949491 [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.21000868, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.73370178, 0.25173266, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.28648809, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.18446394, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.7679167, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.22389557, 0.0, 1.0, 0.

0

In [13]:
DIR = 'models/ebdt_data/sub_network/p_model/'
# learning from interpretation
cmd = f'problog ground {DIR}p_model_lfi_exp11.pl -o {DIR}p_model_exp11_ground.pl --format pl'
os.system(cmd)

(True, 't(0.33,"SRC","Amuvatinib")::e_activity("SRC","Amuvatinib",dec); t(0.33,"SRC","Amuvatinib")::e_activity("SRC","Amuvatinib",inc).\n0.6012503962::e_ksea("SRC","Amuvatinib",dec) :- e_activity("SRC","Amuvatinib",dec).\n0.001::e_ksea("SRC","Amuvatinib",dec) :- e_activity("SRC","Amuvatinib",inc).\n0.1993748019::e_ksea("SRC","Amuvatinib",dec).\nt(0.33,"ABL1","Amuvatinib")::e_activity("ABL1","Amuvatinib",dec); t(0.33,"ABL1","Amuvatinib")::e_activity("ABL1","Amuvatinib",inc).\n0.7055614293::e_ksea("ABL1","Amuvatinib",dec) :- e_activity("ABL1","Amuvatinib",dec).\n0.001::e_ksea("ABL1","Amuvatinib",dec) :- e_activity("ABL1","Amuvatinib",inc).\n0.1472192854::e_ksea("ABL1","Amuvatinib",dec).\nt(0.33,"PTK2","Amuvatinib")::e_activity("PTK2","Amuvatinib",dec); t(0.33,"PTK2","Amuvatinib")::e_activity("PTK2","Amuvatinib",inc).\n0.594296762::e_ksea("PTK2","Amuvatinib",dec) :- e_activity("PTK2","Amuvatinib",dec).\n0.001::e_ksea("PTK2","Amuvatinib",dec) :- e_activity("PTK2","Amuvatinib",inc).\n0.2028

256