In [1]:
from rtgemlib import RTGEM
from rtgemlib import sample_from_tgem, LogLikelihood, scoreBic, mle_lambdas, LocaleLogLikelihood, get_count_duration_df, get_node_LogLikelihood, set_pcv_lambda_t, backward_neighbors_gen,\
compute_logLikelihood, set_nodes_timeseries, set_nodes_parents_counts, duration, get_parents_count_vector, forward_neighbors_gen

from tqdm.autonotebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx



In [2]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [3]:
model = {'B': 
            {
            'timescales': {'A' : [[1,2], [5, 6]]},\
            'lambdas': {
                       (0,0): 10, \
                       (0,1): 1.6, \
                       (1,0): 3, \
                       (1,1) : 1
                      }
            },
            'A': {
                'timescales': {'B': [[0,1], [10,15]]},\
                'lambdas': {
                       (0,0): 1, \
                       (0,1): 4, \
                       (1,0): 5, \
                       (1,1) : 9
                      }
            }
        }



In [4]:
rtgem_model = RTGEM(model)

In [5]:
t_max = 10000

## Sampling

In [6]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=t_max)





In [12]:
set_pcv_lambda_t(model=rtgem_model, data=sampled_data, t_max=t_max)

In [13]:
get_parents_count_vector(
                rtgem_model.get_parents_count('B'), 1.412222463741663, t_max)

((1, 0), 3.8034030950520545)

In [14]:
count_duration_df = get_count_duration_df(model=rtgem_model, data=sampled_data, t_max=t_max)

In [15]:
count_duration_df

Unnamed: 0,event,pcv,lambda_t,duration,count
0,B,"(0, 0)",10.0,1.372583,21
1,B,"(0, 1)",1.6,31.890831,40
2,B,"(1, 0)",3.0,35.890831,100
3,B,"(1, 1)",1.0,9930.845755,10267
4,A,"(0, 0)",1.0,19.252723,19
5,A,"(0, 1)",4.0,3427.319305,13783
6,A,"(1, 0)",5.0,54.426162,283
7,A,"(1, 1)",9.0,6499.00181,58725


## Likelihood

In [16]:
compute_logLikelihood(count_duration_df)

66176.84862483843

In [17]:
LogLikelihood(model=rtgem_model, observed_data=sampled_data, t_max=t_max)

66176.84862483843

## Parameters learning (lambdas)

In [18]:
mle_lambdas(data=sampled_data, model=rtgem_model, t_max=t_max)

Unnamed: 0,event,pcv,lambda_t,duration,count
0,B,"(0, 0)",15.29962,1.372583,21
1,B,"(0, 1)",1.254279,31.890831,40
2,B,"(1, 0)",2.786227,35.890831,100
3,B,"(1, 1)",1.03385,9930.845755,10267
4,A,"(0, 0)",0.986873,19.252723,19
5,A,"(0, 1)",4.02151,3427.319305,13783
6,A,"(1, 0)",5.199705,54.426162,283
7,A,"(1, 1)",9.036003,6499.00181,58725


## Structure learning

### Modèle de référence

In [19]:
rtgem_model = RTGEM(empty_nodes(['A', 'B']), default_end_timescale=1)

In [20]:
rtgem_model.add_edge_operator(('A', 'A'))
rtgem_model.add_edge_operator(('A', 'B'))
rtgem_model.add_edge_operator(('B', 'A'))

In [21]:
rtgem_model.split_operator(edge=('A', 'A'), timescale=[0,1])

In [22]:
rtgem_model.extend_operator(edge=('A', 'B'))

In [23]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=10000)





In [24]:
t_max = 1000
data = sampled_data[sampled_data['time'] < t_max]

In [25]:
scoreBic(model=rtgem_model, observed_data=data, t_max=t_max)

5021.226026603065

### Forward Search

In [26]:
import itertools
import random
import copy

In [27]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [40]:
model = RTGEM(empty_nodes(['A', 'B']),  default_end_timescale=1)

#### Initialisation 

In [41]:
model = set_nodes_timeseries(model, data)
model = set_nodes_parents_counts(model, model.dpd_graph.nodes, t_max)
set_pcv_lambda_t(model, data, t_max)

lambdas_count_duration_df = get_count_duration_df(data, model, t_max)

LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False
nodes = list(model.dpd_graph.nodes)
possible_edges = list(itertools.product(nodes, repeat = 2))

random.shuffle(possible_edges)

In [42]:
it = 0
forward_logs = []
while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in forward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,\
                                           possible_edges):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

        if score_ngbr > max_score_ngbr:
            max_score_ngbr = score_ngbr
            max_op = op
            max_args = args
            max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
            max_size_log_td_ngbr = size_log_td_ngbr
            max_LogL_ngbr = LogL_ngbr
    print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))
 
    if max_score_ngbr > score:
        max_op(*max_args)
        LogL = max_LogL_ngbr
        size_log_td = max_size_log_td_ngbr
        changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
        lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
        lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

        local_maximum = False
        score = max_score_ngbr
        op_name = 'étendreIntervalle'

        # removes added edge from possible edges
        if max_op == model.add_edge_operator:
            possible_edges.remove(max_args[0])
        # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            op_name = 'ajouterArc'
        if max_op == model.split_operator:
            op_name = 'diviserIntervalle'

        forward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                             copy.deepcopy(model.get_node_parents_timescales('B')),\
                             list(model.dpd_graph.edges()),\
                             op_name,\
                             max_args,\
                             score])
    it += 1

iteration number: 0: scoreBIC = -2013.8155105579642
max ngbr <bound method RTGEM.add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160EFEF09E8>>, args=[('B', 'B')], max_scoreBIC = 2147.972841126711
iteration number: 1: scoreBIC = 2147.972841126711
max ngbr <bound method RTGEM.add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160EFEF09E8>>, args=[('A', 'B')], max_scoreBIC = 5334.614576565169
iteration number: 2: scoreBIC = 5334.614576565169
max ngbr <bound method RTGEM.add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160EFEF09E8>>, args=[('B', 'A')], max_scoreBIC = 7251.8095058075405
iteration number: 3: scoreBIC = 7251.8095058075405
max ngbr <bound method RTGEM.extend_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160EFEF09E8>>, args=[('B', 'A')], max_scoreBIC = 9163.146398433702
iteration number: 4: scoreBIC = 9163.146398433702
max ngbr <bound method RTGEM.extend_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160EFEF09E8>>, args=[('B', 'A')], max

KeyboardInterrupt: 

In [43]:
model = set_nodes_timeseries(model, data)
model = set_nodes_parents_counts(model, model.dpd_graph.nodes, t_max)

In [None]:
mle_lambdas(model, data, t_max)

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,"(0, 0, 0, 0, 0, 0, 0, 0)",9.999023,0.10001,1
1,A,"(0, 0, 0, 0, 0, 0, 0, 1)",0.000000,0.00000,0
2,A,"(0, 0, 0, 0, 0, 0, 1, 0)",0.000000,0.00000,0
3,A,"(0, 0, 0, 0, 0, 0, 1, 1)",0.000000,0.00000,0
4,A,"(0, 0, 0, 0, 0, 1, 0, 0)",0.000000,0.00000,0
5,A,"(0, 0, 0, 0, 0, 1, 0, 1)",0.000000,0.00000,0
6,A,"(0, 0, 0, 0, 0, 1, 1, 0)",0.000000,0.00000,0
7,A,"(0, 0, 0, 0, 0, 1, 1, 1)",0.000000,0.00000,0
8,A,"(0, 0, 0, 0, 1, 0, 0, 0)",0.000000,0.00000,0
9,A,"(0, 0, 0, 0, 1, 0, 0, 1)",0.000000,0.00000,0


In [None]:
scoreBic(model, data, t_max)

In [None]:
# keep forward result in memory
forward_model = copy.deepcopy(model)

## BackwardSearch(Forward)

In [34]:
model = set_nodes_timeseries(model, data)
model = set_nodes_parents_counts(model, model.dpd_graph.nodes, t_max)
set_pcv_lambda_t(model, data, t_max)

lambdas_count_duration_df = get_count_duration_df(data, model, t_max)

LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False

In [35]:
score

-1897.0277331403577

In [36]:
it = 0
backward_logs = []
local_maximum = False

while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in backward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

#         if score_ngbr > max_score_ngbr:
        max_score_ngbr = score_ngbr
        max_op = op
        max_args = args
        max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
        max_size_log_td_ngbr = size_log_td_ngbr
        max_LogL_ngbr = LogL_ngbr

        if max_score_ngbr > score:
            print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))

            max_op(*max_args)
            LogL = max_LogL_ngbr
            size_log_td = max_size_log_td_ngbr
            changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
            lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
            lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

            local_maximum = False
            score = max_score_ngbr
            op_name = 'supprimerArc'

            if max_op == model.inverse_extend_operator:
                op_name = 'reduireIntervalle'
            # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            if max_op == model.inverse_split_operator:
                op_name = 'FusionnerIntervalle'

            backward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                                 copy.deepcopy(model.get_node_parents_timescales('B')),\
                                 list(model.dpd_graph.edges()),\
                                 op_name,\
                                 max_args,\
                                 score])
            break
    it += 1

iteration number: 0: scoreBIC = -1897.0277331403577
max ngbr <bound method RTGEM.inverse_add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160F0256898>>, args=[('A', 'B')], max_scoreBIC = -160.12474173196279
iteration number: 1: scoreBIC = -160.12474173196279
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160F0256898>>, args=[('A', 'A')], max_scoreBIC = 1608.1398232544834
iteration number: 2: scoreBIC = 1608.1398232544834
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160F0256898>>, args=[('A', 'A')], max_scoreBIC = 2492.0021361774925
iteration number: 3: scoreBIC = 2492.0021361774925
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x00000160F0256898>>, args=[('A', 'A')], max_scoreBIC = 2933.927177722613
iteration number: 4: scoreBIC = 2933.927177722613
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x