In [1]:
%reload_ext autoreload
%autoreload 4

In [2]:
import numpy as np
import networkx as nx
from graph_enumerator import *
from graph_local_classes import *
from subgraph_functions import *
from node_semantics import Node_Name_Rule, Edge_Semantics_Rule

In [3]:
generator_dictionary ={ "nodes" : ["A_int","A_obs","A_★","B_obs","B_★","C_obs","C_★","D_obs","D_★"],
    "query_edge_set" : [
        ("A_★",'B_★'),
        ("A_★",'C_★'),
        ("A_★",'D_★'),
        ("B_★",'C_★'),
        ("B_★",'D_★'),
        ("C_★",'B_★'),
        ("C_★",'D_★'),
        ("D_★",'B_★'),
        ("D_★",'C_★')
    ],
    "filters": {
        "explicit_child_parentage"  : [[
            ("A_int",[]),
            ("A_★",["A_int"]),
            ('A_obs',['A_int','A_★']),
            ('B_obs',['B_★']),
            ('C_obs',["C_★"]),
            ('D_obs',["D_★"])
        ]],
        "explicit_parent_offspring" : [[
            ('A_int',['A_obs','A_★']),
            ("A_obs",[]),
            ("B_obs",[]),
            ("C_obs",[]),
            ("D_obs",[])
        ]],
        "extract_remove_self_loops": []
    },
    "conditions": {
        "create_path_complete_condition" : [[("A_int","B_★"),("A_int","C_★"),("A_int","D_★")]],
    }
}



node_semantics={
        # ".*_int" → intervener
        "intervener": {
            "node_type":"intervener",
            "where":"suffix",
            "infix":"_",
            "code":"int"},
        # ".*_obs" → observed
        "observed": {
            "node_type":"observed",
            "where":"suffix",
            "infix":"_",
            "code":"obs"},
        # ".*_★" → hidden
        "hidden": {
            "node_type":"hidden",
            "where":"suffix",
            "infix":"_",
            "code":"★"}
}

edge_semantics={
    "hidden_sample":{
        "source_types":["hidden"],
        "target_types":["hidden"],
        "edge_type": "hidden_sample"
    },
    "observed":{
        "source_types":["hidden"],
        "target_types":["observed"],
        "edge_type": "observed"
    },
    "intervention":{
        "source_types":["intervener"],
        "target_types":None,
        "edge_type": "intervention"
    }
}


# todo(maybe): rework filters and conditions to take node types and edge types as arguments

In [4]:
working_graph_iter = generate_graphs(**generator_dictionary)
working_graphs = list(working_graph_iter)

In [5]:
working_graph_index = 0
test_me=working_graphs[working_graph_index].copy()

Node_Name_Rule.graph_semantics_apply(test_me,node_semantics)
Edge_Semantics_Rule.graph_semantics_apply(test_me,edge_semantics)

gs_in, gp_in = sub_graph_sample(test_me,edge_types=["hidden_sample"], param_init=sf_big)
gs_out, gp_out = sub_graph_sample(test_me,edge_types=['observed'], param_init=sf_big)

In [8]:
inner_simul = InnerGraphSimulation(gs_in,gp_in)
vals = inner_simul.sample(1)
test_obs_dict = gp_out.to_dict()

In [7]:
test_obs_dict

{'lambda0': [0.0075378490479738698],
 'mu': array([ 0.25069507,  0.0183537 ,  0.44992743,  2.19083192]),
 'n': 4,
 'names': [('A_★', 'A_obs'),
  ('B_★', 'B_obs'),
  ('C_★', 'C_obs'),
  ('D_★', 'D_obs')],
 'p': 0.8,
 'psi': array([ 0.00368338,  0.0003471 ,  0.00452083,  0.0061073 ]),
 'psi_shape': 1.0,
 'r': array([ 0.01469267,  0.01891168,  0.0100479 ,  0.00278766]),
 'r_shape': 1.0,
 'scale_free_bounds': (0.0001, 10000)}

In [15]:
def cond_to_data(cond):
    a,b,c,d = cond
    data_sequences = [
        [a,b,c,d], 
        [a,b,c,-np.inf], 
        [a,b,-np.inf,d],
        [a,b,-np.inf,-np.inf],
        [a,-np.inf,-np.inf,-np.inf]
    ]
    return data_sequences

## this will be a function that takes in a number M of samples, a graphstructure gs and a graph parameters gp

    gs_in, gp_in = sub_graph_sample(graph,edge_types=["hidden_sample"], param_init=sf_big)
    gs_out, gp_out = sub_graph_sample(graph,edge_types=['observed'], param_init=sf_big)
    inner_simul = InnerGraphSimulation(gs_in,gp_in)
    vals = inner_simul.sample(M)
    obs_dict = gp_out.to_dict()

def cross_entropy_loglik(data_sets,probability,k,obs_data,aux_data,parameters):
    return sum([probability[i]*k*multi_edge_loglik(d_set,aux_data,parameters) for i,d_set in enumerate(data_sets)])

def multi_edge_loglik(obs_data,aux_data,parameters):
    # special casing for my problem, this needs to be made more general
    non_int_node_idx = slice(1,4)
    obs_data = obs_data[non_int_node_idx]
    aux_data = aux_data[non_int_node_idx]
    parameters = parameters[non_int_node_idx]
    # end special casing
    
    return sum([one_edge_loglik(aux_data[i],obs_data[i],parameters['psi'][i],parameters['r'][i]) for i in len(aux_data)]    

def one_edge_loglik(cause_time, effect_time, psi, r, T=4.0):
    if np.isinf(cause_time):
        if np.isinf(effect_time):
            return 0
        elif not np.isinf(effect_time):
            return -np.inf
    if not np.isinf(cause_time):
        if np.isinf(effect_time):
            return psi/r*np.exp(-r*(T-cause_time))
        elif effect_time < cause_time: 
            return -np.inf
        else:
            return np.log(psi) - (r*(effect_time-cause_time)) + psi/r*np.exp(-r*(effect_time-cause_time))

In [None]:

data_p=[0.512,0.128,0.128,0.032,.2]

sf_big = {'scale_free_bounds': (10**-4,10**4)}
sf_small = {'scale_free_bounds': (10**-1,10**1)}

In [13]:
cond1 = [0,0,0,0]
cond2 = [0,1,3,2]
cond3 = [0,3,2,1]
cond4 = [0,1,2,2]

In [16]:
cond = cond_to_data(cond2)
for i,cond_i in enumerate(cond):
    print("\n\n"+str(i+1)+"th"+"condition")
    print(cond_i)
    for samp in vals:
        local_vals =[]
        for i in range(1,4):
            here = one_edge_loglik(samp[i],cond_i[i],test_obs_dict['psi'][i],test_obs_dict['r'][i])
            print(here)
            local_vals.append(here)
        print("{} is sum of lls".format(sum(local_vals)))



1thcondition
[0, 1, 3, 2]
-7.9663462243
-4.99215383466
-2.92489417058
-15.883394229532822 is sum of lls


2thcondition
[0, 1, 3, -inf]
-7.9663462243
-4.99215383466
2.16674319623
-10.791756862724293 is sum of lls


3thcondition
[0, 1, -inf, 2]
-7.9663462243
0.432348055921
-2.92489417058
-10.458892338953524 is sum of lls


4thcondition
[0, 1, -inf, -inf]
-7.9663462243
0.432348055921
2.16674319623
-5.367254972144995 is sum of lls


5thcondition
[0, -inf, -inf, -inf]
0.0170241299556
0.432348055921
2.16674319623
2.61611538210854 is sum of lls


SyntaxError: invalid syntax (<ipython-input-11-ceaf637dc082>, line 5)

In [115]:
cond[0][slice(1,4)]

[1, 3, 2]

Data from lagnado & Sloman

value_sequences = ['ABCD', 'ABC', 'ABD', 'AB','A']
data_vals = np.random.choice(value_sequences, size = 100, 
cond1timings= {"A":0,"B":0,"C":0,"D":0}
cond2timings= {"A":0,"B":1,"C":3,"D":2}
cond3timings= {"A":0,"B":3,"C":2,"D":1}
cond4timings= {"A":0,"B":1,"C":2,"D":2}


results = {("a","b"):[1.00, .96, .58,.92], ('a','c'):[.38, .17, .88, .29], ('a','d'):[.33, .13, .54, .33],
           ("b","c"):[.75, .79, .21, .79], ('b','d'):[.75, .96, .38, .88],
           ("c","b"):[.63, .38, .79, .50], ('c','d'):[.29, .21, .33, .29],
           ("d","b"):[.50, .46, .50, .46], ("d","c"):[.25, .83 ,.71 ,.21]}


pdf of k arrivals in interval (0,T) is $\frac{(Λ_{0,T})^k \exp(Λ_{0,T})}{k!}$

so pdf of 0 arrivals between 0 and T is $\exp(-Λ_{0,T})$, pdf of the first arrival occuring at $\tau$ is the product of the itstantaneous rate at $\tau$ and the probability that no event occurred before $\tau$ (from 0 to $\tau$) is $λ(T)\exp(-Λ_{0,T})$

lets say a cause at $t_0$ initiates our rate function so that it's 0 before $t_0$ and $λ(t;t_0)$

if our $λ(t; t_0) = 𝜓\exp(-r(t-t_0))$ then $Λ_{0,T} = \frac{𝜓}{r}(1 - \exp(-r(T-t_0)))$

so the log likelihoods of the pdf for no events by T is $\frac{𝜓}{r}(1 - \exp(-r(T-t_0)))$ and for 1 event exactly at $\tau$ is $$\log(λ(\tau;t_0))+ \log(\exp(-Λ_{0,T})) = \log(𝜓) - (r(\tau-t_0)) + \frac{𝜓}{r}(1 - \exp(-r(T-t_0)))$$
