In [1]:
from rtgemlib import RTGEM
from rtgemlib import sample_from_tgem, LogLikelihood, scoreBic, mle_lambdas, LocaleLogLikelihood, get_count_duration_df, get_node_LogLikelihood, set_pcv_lambda_t, backward_neighbors_gen,\
compute_logLikelihood, set_nodes_timeseries, set_nodes_parents_counts, duration, get_parents_count_vector, forward_neighbors_gen

from tqdm.autonotebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx



In [2]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [3]:
model = {'B': 
            {
            'timescales': {'A' : [[1,2], [5, 6]]},\
            'lambdas': {
                       (0,0): 10, \
                       (0,1): 1.6, \
                       (1,0): 3, \
                       (1,1) : 1
                      }
            },
            'A': {
                'timescales': {'B': [[0,1], [10,15]]},\
                'lambdas': {
                       (0,0): 1, \
                       (0,1): 4, \
                       (1,0): 5, \
                       (1,1) : 9
                      }
            }
        }



In [4]:
rtgem_model = RTGEM(model)

In [5]:
t_max = 10000

## Sampling

In [6]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=t_max)

A Jupyter Widget

A Jupyter Widget



A Jupyter Widget



In [7]:
set_pcv_lambda_t(model=rtgem_model, data=sampled_data, t_max=t_max)

In [8]:
sampled_data

Unnamed: 0,time,event,pcv,lambda_t
0,0.009960,B,"(0, 0)",10.0
1,0.076022,B,"(0, 0)",10.0
10401,0.087976,A,"(1, 0)",5.0
2,0.208396,B,"(0, 0)",10.0
3,0.301071,B,"(0, 0)",10.0
10402,0.913461,A,"(1, 0)",5.0
10403,0.920394,A,"(1, 0)",5.0
4,0.936484,B,"(0, 0)",10.0
5,0.974647,B,"(0, 0)",10.0
6,1.373896,B,"(1, 0)",3.0


In [9]:
count_duration_df = get_count_duration_df(model=rtgem_model, data=sampled_data, t_max=t_max)

In [10]:
count_duration_df

Unnamed: 0,event,pcv,lambda_t,duration,count
0,B,"(0, 0)",10.0,8.247418,17
1,B,"(0, 1)",1.6,1313.262663,105
2,B,"(1, 0)",3.0,8678.489919,237
3,B,"(1, 1)",1.0,0.0,10042
4,A,"(0, 0)",1.0,63.578406,16
5,A,"(0, 1)",4.0,9857.769119,13878
6,A,"(1, 0)",5.0,78.652475,218
7,A,"(1, 1)",9.0,0.0,46451


## Likelihood

In [11]:
compute_logLikelihood(count_duration_df)

53894.913436444753

In [12]:
LogLikelihood(model=rtgem_model, observed_data=sampled_data, t_max=t_max)

53894.913436444753

## Parameters learning (lambdas)

In [None]:
mle_lambdas(data=sampled_data, model=rtgem_model, t_max=t_max)

## Structure learning

### Modèle de référence

In [30]:
rtgem_model = RTGEM(empty_nodes(['A', 'B']), default_end_timescale=1)

In [31]:
rtgem_model.add_edge_operator(('A', 'A'))
rtgem_model.add_edge_operator(('A', 'B'))
rtgem_model.add_edge_operator(('B', 'A'))

In [32]:
rtgem_model.split_operator(edge=('A', 'A'), timescale=[0,1])

In [33]:
rtgem_model.extend_operator(edge=('A', 'B'))

In [34]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=10000)

A Jupyter Widget

A Jupyter Widget



A Jupyter Widget



In [35]:
t_max = 1000
data = sampled_data[sampled_data['time'] < t_max]

In [36]:
scoreBic(model=rtgem_model, observed_data=data, t_max=t_max)

2208.0511128124062

### Forward Search

In [37]:
import itertools
import random
import copy

In [38]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [39]:
model = RTGEM(empty_nodes(['A', 'B']),  default_end_timescale=1)

#### Initialisation 

In [40]:
model = set_nodes_timeseries(model, data)
model = set_nodes_parents_counts(model, model.dpd_graph.nodes, t_max)
set_pcv_lambda_t(model, data, t_max)

lambdas_count_duration_df = get_count_duration_df(data, model, t_max)

LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False
nodes = list(model.dpd_graph.nodes)
possible_edges = list(itertools.product(nodes, repeat = 2))

random.shuffle(possible_edges)

In [41]:
it = 0
forward_logs = []
while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in forward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,\
                                           possible_edges):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

        if score_ngbr > max_score_ngbr:
            max_score_ngbr = score_ngbr
            max_op = op
            max_args = args
            max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
            max_size_log_td_ngbr = size_log_td_ngbr
            max_LogL_ngbr = LogL_ngbr
    print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))
 
    if max_score_ngbr > score:
        max_op(*max_args)
        LogL = max_LogL_ngbr
        size_log_td = max_size_log_td_ngbr
        changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
        lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
        lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

        local_maximum = False
        score = max_score_ngbr
        op_name = 'étendreIntervalle'

        # removes added edge from possible edges
        if max_op == model.add_edge_operator:
            possible_edges.remove(max_args[0])
        # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            op_name = 'ajouterArc'
        if max_op == model.split_operator:
            op_name = 'diviserIntervalle'

        forward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                             copy.deepcopy(model.get_node_parents_timescales('B')),\
                             list(model.dpd_graph.edges()),\
                             op_name,\
                             max_args,\
                             score])
    it += 1

iteration number: 0: scoreBIC = -2013.8155105579642
max ngbr <bound method RTGEM.add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x000001B426A1F358>>, args=[('B', 'B')], max_scoreBIC = -1327.438370615615
iteration number: 1: scoreBIC = -1327.438370615615
max ngbr <bound method RTGEM.extend_operator of <rtgemlib.rtgem.RTGEM object at 0x000001B426A1F358>>, args=[('B', 'B')], max_scoreBIC = -1474.077205678034


In [42]:
model = set_nodes_timeseries(model, data)
model = set_nodes_parents_counts(model, model.dpd_graph.nodes, t_max)

In [43]:
mle_lambdas(model, data, t_max)

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,(),4.506,1000.0,4506
1,B,"(0,)",0.09,1000.0,90
2,B,"(1,)",inf,0.0,3719


In [44]:
scoreBic(model, data, t_max)

1949.9383774816824

## BackwardSearch(Forward)

In [45]:
model = set_nodes_timeseries(model, data)
model = set_nodes_parents_counts(model, model.dpd_graph.nodes, t_max)
set_pcv_lambda_t(model, data, t_max)

lambdas_count_duration_df = get_count_duration_df(data, model, t_max)

LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False

In [46]:
score

1949.9383774816824

In [47]:
it = 0
backward_logs = []
local_maximum = False

while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in backward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

#         if score_ngbr > max_score_ngbr:
        max_score_ngbr = score_ngbr
        max_op = op
        max_args = args
        max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
        max_size_log_td_ngbr = size_log_td_ngbr
        max_LogL_ngbr = LogL_ngbr

        if max_score_ngbr > score:
            print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))

            max_op(*max_args)
            LogL = max_LogL_ngbr
            size_log_td = max_size_log_td_ngbr
            changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
            lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
            lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

            local_maximum = False
            score = max_score_ngbr
            op_name = 'supprimerArc'

            if max_op == model.inverse_extend_operator:
                op_name = 'reduireIntervalle'
            # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            if max_op == model.inverse_split_operator:
                op_name = 'FusionnerIntervalle'

            backward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                                 copy.deepcopy(model.get_node_parents_timescales('B')),\
                                 list(model.dpd_graph.edges()),\
                                 op_name,\
                                 max_args,\
                                 score])
            break
    it += 1

iteration number: 0: scoreBIC = 1949.9383774816824
max ngbr <bound method RTGEM.inverse_add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x000001B426A1F358>>, args=[('B', 'B')], max_scoreBIC = 3548.5909502133763
iteration number: 1: scoreBIC = 3548.5909502133763
