#### Prerequesites:
* Define schema (make figure) 
* Define structure (make figure)
* Define schema class in python (src/models/model_schemata.py)
* Convert schema + structure into model template (models/templates/p_model.pl)

#### Procedure:
* Load data
* Adjust model script for lfi accordingly
* Create evidence file (if I don't need different evidence files, move to builmodel notebook)
* LFI

1 single instance, 1 pst, 3 enzymes, all samples in one instance, test how many samples can be in one instance, how computing time depends on N p::f

In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
# Importing libraries
import pandas as pd
import numpy as np
import os
import random

from src.models import model_schemata as schema
from src.models import build_model as build
from src.visualization import visualize as viz

In [13]:
# os.chdir('/Users/magdalena/OneDrive - Queen Mary, University of London/bezzlab/research/projects/phospho_pi/')
os.chdir('/home/mhuebner/Desktop/bezzlab/research/projects/phospho_pi/')

#### Training/Testing

In [14]:
# reading csv from files into dict
data = {}
data['e_activity'] = pd.read_csv('data/processed/synthetic_data/toy_network_mini/p_model/e_activity.csv')
data['p_occupancy'] = pd.read_csv('data/processed/synthetic_data/toy_network_mini/p_model/p_occupancy.csv')
data['e_activity']

Unnamed: 0,enzyme,sample,value,prob
0,e1,s1,inc,0.774553
1,e1,s10,dec,0.617971
2,e1,s100,inc,0.796040
3,e1,s11,base,0.650127
4,e1,s12,dec,0.748834
...,...,...,...,...
295,e3,s95,inc,0.644811
296,e3,s96,inc,0.550137
297,e3,s97,inc,0.812202
298,e3,s98,dec,0.783041


In [15]:
data['p_occupancy']['prob'] = np.where(data['p_occupancy']['value'] == 'base', 1 - data['p_occupancy']['prob'], data['p_occupancy']['prob'])
data['p_occupancy']['value'] = np.where(data['p_occupancy']['value'] == 'base', np.random.choice(['inc', 'dec'], size=len(data['p_occupancy']['value']), p=[0.5, 0.5]), data['p_occupancy']['value'])
data['e_activity']['prob'] = np.where(data['e_activity']['value'] == 'base', 1 - data['e_activity']['prob'], data['e_activity']['prob'])
data['e_activity']['value'] = np.where(data['e_activity']['value'] == 'base', np.random.choice(['inc', 'dec'], size=len(data['e_activity']['value']), p=[0.5, 0.5]), data['e_activity']['value'])

Splitting data into training/testing

In [31]:
# getting sample names (union of all samples in all datasets)
samples = list(set(data['e_activity']['sample']).union(set(data['p_occupancy']['sample'])))
samples.sort()
# sample x% of samples randomly without replacement with seed
random.seed(613)
train = random.sample(samples, int(len(samples)*0.1))
# train = ['s40', 's23', 's22', 's99', 's7', 's61', 's62', 's63']
# filter data
training_data = {}
training_data['e_activity'] = data['e_activity'][data['e_activity']['sample'].isin(train)].reset_index(drop=True)
training_data['p_occupancy'] = data['p_occupancy'][data['p_occupancy']['sample'].isin(train)].reset_index(drop=True)
train

['s40', 's23', 's22', 's99', 's7', 's61', 's62', 's63', 's10', 's2']

In [32]:
# Mapping data to Problog predicates
predicates = {}
predicates['e_ksea'] = schema.EKseaPredicate()
predicates['e_ksea'].add_data(training_data['e_activity'], 'enzyme', 'sample', 'value')
predicates['p_fc'] = schema.PFoldChangePredicate()
predicates['p_fc'].add_data(training_data['p_occupancy'], 'phosphosite', 'sample', 'value')

In [33]:
evidence = []
for predicate in predicates:
    evid_generator = build.ProblogStatementGenerator(predicates[predicate])
    evidence = evidence + evid_generator.generate_facts(build.EvidenceTemplate)
evidence[:10]

['evidence(e_ksea(e1, s10, dec)).',
 'evidence(e_ksea(e1, s2, inc)).',
 'evidence(e_ksea(e1, s22, dec)).',
 'evidence(e_ksea(e1, s23, inc)).',
 'evidence(e_ksea(e1, s40, dec)).',
 'evidence(e_ksea(e1, s61, inc)).',
 'evidence(e_ksea(e1, s62, dec)).',
 'evidence(e_ksea(e1, s63, inc)).',
 'evidence(e_ksea(e1, s7, dec)).',
 'evidence(e_ksea(e1, s99, inc)).']

In [34]:
# write evidence to text file
with open('models/synthetic_data/toy_network_mini/p_model/evidence_o1b.pl', 'w') as f:
    for item in evidence:
        f.write("%s\n" % item)

LFI

In [35]:
DIR = 'models/synthetic_data/toy_network_mini/p_model/'
max_iter = 3
# learning from interpretation
cmd = f'problog lfi {DIR}p_model_lfi_o1b.pl {DIR}evidence_o1b.pl -O {DIR}p_model_o1b.pl -k ddnnf -v -n {max_iter}'
os.system(cmd)

[INFO] Output level: INFO
[INFO] Number of examples: 1
[INFO] Compile time: 0.546465
[INFO] Weights to learn: [t(_,E,S)::e_ksea(E,S,dec), t(_,E,S)::e_ksea(E,S,inc), t(_,P,S)::p_fc(P,S,dec), t(_,P,S)::p_fc(P,S,inc), t(0.33,E,S)::e_activity(E,S,dec), t(0.33,E,S)::e_activity(E,S,inc), t(0.7,E,P)::occ_dec(P,S), t(0.7,E,P)::occ_dec(P,S), t(0.7,E,P)::occ_inc(P,S), t(0.7,E,P)::occ_inc(P,S), t(1.0)::p_occupancy(P,S,dec), t(0.0)::p_occupancy(P,S,inc), t(0.0)::p_occupancy(P,S,dec), t(1.0)::p_occupancy(P,S,inc), t(0.5)::p_occupancy(P,S,dec), t(0.5)::p_occupancy(P,S,inc), t(0.0)::p_occupancy(P,S,dec), t(0.0)::p_occupancy(P,S,inc)]
[INFO] Bodies: [lfi_body(0,t(E,S),E,S,dec), lfi_body(1,t(E,S),E,S,inc), lfi_body(2,t(P,S),P,S,dec), lfi_body(3,t(P,S),P,S,inc), lfi_body(4,t(E,S),E,S,dec), lfi_body(5,t(E,S),E,S,inc), lfi_body(6,t(E,P),P,S), lfi_body(7,t(E,P),P,S), lfi_body(8,t(E,P),P,S), lfi_body(9,t(E,P),P,S), lfi_body(10,t,P,S,dec), lfi_body(11,t,P,S,inc), lfi_body(12,t,P,S,dec), lfi_body(13,t,P,S,inc

Traceback (most recent call last):
  File "/home/mhuebner/anaconda3/envs/phospho_pi/lib/python3.11/site-packages/problog/learning/lfi.py", line 1544, in main
    results = run_lfi(program, examples, knowledge=knowledge, **options)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mhuebner/anaconda3/envs/phospho_pi/lib/python3.11/site-packages/problog/learning/lfi.py", line 1371, in run_lfi
    score = lfi.run()
            ^^^^^^^^^
  File "/home/mhuebner/anaconda3/envs/phospho_pi/lib/python3.11/site-packages/problog/learning/lfi.py", line 984, in run
    score = self.step()
            ^^^^^^^^^^^
  File "/home/mhuebner/anaconda3/envs/phospho_pi/lib/python3.11/site-packages/problog/learning/lfi.py", line 951, in step
    results = self._evaluate_examples()
              ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mhuebner/anaconda3/envs/phospho_pi/lib/python3.11/site-packages/problog/learning/lfi.py", line 823, in _evaluate_examples
    result = evalua

256