In [3]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from multiprocessing import Process, Queue
import os
import numpy as np
import seaborn as sns
import tensorflow as tf
import pandas as pd
from scipy.misc import imsave
import scipy.stats
import random
from sklearn.metrics import accuracy_score

from problog import get_evaluatable
from problog.program import PrologString, SimpleProgram
from problog.core import ProbLog
from problog.logic import Constant,Var,Term,AnnotatedDisjunction
from problog.learning import lfi

%matplotlib notebook

In [18]:
model = """
t(0.5)::gender.
t(_)::age_1;t(_)::age_2;t(_)::age_3;t(_)::age_4.
t(0.5)::smoke.
t(0.5)::alcohol.

t(0.33)::bmi_1;t(0.33)::bmi_2;t(0.33)::bmi_3.
t(0.5)::ldl.
t(0.33)::blood_pressure_1;t(0.33)::blood_pressure_2;t(0.33)::blood_pressure_3.

t(_)::blood_pressure_1:-gender.
t(_)::blood_pressure_1:-age_1.
t(_)::blood_pressure_1:-age_2.
t(_)::blood_pressure_1:-age_3.
t(_)::blood_pressure_1:-age_4.
t(_)::blood_pressure_1:-smoke.
t(_)::blood_pressure_1:-alcohol.

t(_)::blood_pressure_2:-gender.
t(_)::blood_pressure_2:-age_1.
t(_)::blood_pressure_2:-age_2.
t(_)::blood_pressure_2:-age_3.
t(_)::blood_pressure_2:-age_4.
t(_)::blood_pressure_2:-smoke.
t(_)::blood_pressure_2:-alcohol.

t(_)::blood_pressure_3:-gender.
t(_)::blood_pressure_3:-age_1.
t(_)::blood_pressure_3:-age_2.
t(_)::blood_pressure_3:-age_3.
t(_)::blood_pressure_3:-age_4.
t(_)::blood_pressure_3:-smoke.
t(_)::blood_pressure_3:-alcohol.

t(_)::ldl:-gender.
t(_)::ldl:-age_1.
t(_)::ldl:-age_2.
t(_)::ldl:-age_3.
t(_)::ldl:-age_4.
t(_)::ldl:-smoke.
t(_)::ldl:-alcohol.

t(_)::bmi_1:-gender.
t(_)::bmi_1:-age_1.
t(_)::bmi_1:-age_2.
t(_)::bmi_1:-age_3.
t(_)::bmi_1:-age_4.
t(_)::bmi_1:-smoke.
t(_)::bmi_1:-alcohol.

t(_)::bmi_2:-gender.
t(_)::bmi_2:-age_1.
t(_)::bmi_2:-age_2.
t(_)::bmi_2:-age_3.
t(_)::bmi_2:-age_4.
t(_)::bmi_2:-smoke.
t(_)::bmi_2:-alcohol.

t(_)::bmi_3:-gender.
t(_)::bmi_3:-age_1.
t(_)::bmi_3:-age_2.
t(_)::bmi_3:-age_3.
t(_)::bmi_3:-age_4.
t(_)::bmi_3:-smoke.
t(_)::bmi_3:-alcohol.

t(_)::heart_d:-blood_pressure_1.
t(_)::heart_d:-blood_pressure_2.
t(_)::heart_d:-blood_pressure_3.
t(_)::heart_d:-ldl.
t(_)::heart_d:-bmi_1.
t(_)::heart_d:-bmi_2.
t(_)::heart_d:-bmi_3.

t(_)::glucose:-blood_pressure_1.
t(_)::glucose:-blood_pressure_2.
t(_)::glucose:-blood_pressure_3.
t(_)::glucose:-ldl.
t(_)::glucose:-bmi_1.
t(_)::glucose:-bmi_2.
t(_)::glucose:-bmi_3.
"""

In [19]:
gender = Term('gender')

age_1 = Term('age_1')
age_2 = Term('age_2')
age_3 = Term('age_3')
age_4 = Term('age_4')

smoke = Term('smoke')

alcohol = Term('alcohol')

bmi_1 = Term('bmi_1')
bmi_2 = Term('bmi_2')
bmi_3 = Term('bmi_3')

ldl = Term('ldl')

blood_pressure_1 = Term('blood_pressure_1')
blood_pressure_2 = Term('blood_pressure_2')
blood_pressure_3 = Term('blood_pressure_3')

heart_d = Term('heart_d')

glucose = Term('glucose')

In [20]:
#examples = [
#    [(gender_female, True), (age_2, True), (blood_pressure_1, True)],
#    [(gender_female, False), (age_2, False), (blood_pressure_2, False)]
#]

df= pd.read_pickle("data.pkl")

def convert_row(row):
    results = []
    
    if row["RIAGENDR"]==2.0:
        results += [(gender, False)]
    if row["RIAGENDR"]==1.0:
        results += [(gender, True)]
    if row["RIDAGEYR"]==0.0:
        results += [(age_1, True)]
    if row["RIDAGEYR"]==1.0:
        results += [(age_2, True)]
    if row["RIDAGEYR"]==2.0:
        results += [(age_3, True)]
    if row["RIDAGEYR"]==3.0:
        results += [(age_4, True)]
    if row["ALQ120Q"]==1.0:
        results += [(alcohol, True)]
    if row["ALQ120Q"]==0.0:
        results += [(alcohol, False)]
    if row["SMQ020"]==1.0:
        results += [(smoke, True)]
    if row["SMQ020"]==2.0:
        results += [(smoke, False)]
    if row["BPXDI1"]==0.0:
        results += [(blood_pressure_1, True)]
    if row["BPXDI1"]==1.0:
        results += [(blood_pressure_2, True)]
    if row["BPXDI1"]==2.0:
        results += [(blood_pressure_3, True)]
    if row["LBDLDL"]==0.0:
        results += [(ldl, True)]
    if row["LBDLDL"]==1.0:
        results += [(ldl, False)]
    if row["BMXBMI"]==0.0:
        results += [(bmi_1, True)]
    if row["BMXBMI"]==1.0:
        results += [(bmi_2, True)]
    if row["BMXBMI"]==2.0:
        results += [(bmi_3, True)]
    if row["HEARTDISEASE"]==True:
        results += [(heart_d, True)]
    if row["HEARTDISEASE"]==False:
        results += [(heart_d, False)]
    if row["LBXGLU"]==1.0:
        results += [(glucose, True)]
    if row["LBXGLU"]==0.0:
        results += [(glucose, False)]
    
    
    return results

evidence = [convert_row(df.iloc[0]), convert_row(df.iloc[3])]
#evidence = [convert_row(df.iloc[i]) for i in range(3,5)]

evidence = [[(gender, True), (age_1, True), (bmi_1, True)],
 [(gender, False), (age_2, True)]]

evidence

[[(gender, True), (age_1, True), (bmi_1, True)],
 [(gender, False), (age_2, True)]]

In [21]:
score, weights, atoms, iteration, lfi_problem = lfi.run_lfi(PrologString(model), evidence)

InvalidValue: Sum of annotated disjunction weigths exceeds acceptable value.

In [15]:
print (lfi_problem.get_model())

0.5::gender.
1.0::age_1; 0.0::age_2; 0.0::age_3; 0.0::age_4.
0.509281411796829::smoke.
0.677980958826099::alcohol.
0.641515897796475::bmi_1; 0.33::bmi_2; 0.33::bmi_3.
0.5::ldl.
0.33::blood_pressure_1; 0.33::blood_pressure_2; 0.33::blood_pressure_3.
0.680885277614264::blood_pressure_1 :- gender.
0.775895420594122::blood_pressure_1 :- age_1.
0.906958843875488::blood_pressure_1 :- age_2.
0.822857387290444::blood_pressure_1 :- age_3.
0.998349660088138::blood_pressure_1 :- age_4.
0.449218387165604::blood_pressure_1 :- smoke.
0.604688657441335::blood_pressure_1 :- alcohol.
0.901180357938612::blood_pressure_2 :- gender.
0.9435456970753::blood_pressure_2 :- age_1.
0.913735616994494::blood_pressure_2 :- age_2.
0.523637087631031::blood_pressure_2 :- age_3.
0.664206146953203::blood_pressure_2 :- age_4.
0.399826657019851::blood_pressure_2 :- smoke.
0.411155834681506::blood_pressure_2 :- alcohol.
0.475987239249909::blood_pressure_3 :- gender.
0.599440723508668::blood_pressure_3 :- age_1.
0.34544259