In [1]:
from rtgemlib import RTGEM
from rtgemlib import sample_from_tgem, LogLikelihood, scoreBic, mle_lambdas, LocaleLogLikelihood, get_count_duration_df, get_node_LogLikelihood, set_pcv_lambda_t, backward_neighbors_gen,\
compute_logLikelihood, set_nodes_timeseries, set_nodes_parents_counts, duration, get_parents_count_vector, forward_neighbors_gen, initModelFromData

from tqdm.autonotebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx



In [2]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [3]:
model = {'B': 
            {
            'timescales': {'A' : [[1,2], [5, 6]]},\
            'lambdas': {
                       (0,0): 10, \
                       (0,1): 1.6, \
                       (1,0): 3, \
                       (1,1) : 1
                      }
            },
            'A': {
                'timescales': {'B': [[0,1], [10,15]]},\
                'lambdas': {
                       (0,0): 1, \
                       (0,1): 4, \
                       (1,0): 5, \
                       (1,1) : 9
                      }
            }
        }



In [4]:
rtgem_model = RTGEM(model)

In [None]:
t_max = 10000

## Sampling

In [None]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=t_max)

A Jupyter Widget

A Jupyter Widget

In [None]:
set_pcv_lambda_t(model=rtgem_model, data=sampled_data, t_max=t_max)

In [None]:
count_duration_df = get_count_duration_df(model=rtgem_model, data=sampled_data, t_max=t_max)

In [None]:
count_duration_df

## Likelihood

In [None]:
compute_logLikelihood(count_duration_df)

In [None]:
LogLikelihood(model=rtgem_model, observed_data=sampled_data, t_max=t_max)

## Parameters learning (lambdas)

In [None]:
mle_lambdas(data=sampled_data, model=rtgem_model, t_max=t_max)

## Structure learning

### Modèle de référence

In [3]:
rtgem_model = RTGEM(empty_nodes(['A', 'B']), default_end_timescale=1)

In [4]:
rtgem_model.add_edge_operator(('A', 'A'))
rtgem_model.add_edge_operator(('A', 'B'))
rtgem_model.add_edge_operator(('B', 'A'))

In [5]:
rtgem_model.split_operator(edge=('A', 'A'), timescale=[0,1])

In [6]:
rtgem_model.extend_operator(edge=('A', 'B'))

In [7]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=10000)

A Jupyter Widget

A Jupyter Widget



A Jupyter Widget



In [8]:
t_max = 100
data = sampled_data[sampled_data['time'] < t_max]

In [9]:
initModelFromData(model=rtgem_model, observed_data=data, t_max=t_max)

<rtgemlib.rtgem.RTGEM at 0x2187ae6a9e8>

In [10]:
count_duration_df = get_count_duration_df(model=rtgem_model, data=data, t_max=t_max)

In [11]:
count_duration_df

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,"(0, 0, 0)",1.754941,0.660267,0
1,A,"(0, 0, 1)",4.972062,0.243973,0
2,A,"(0, 1, 0)",8.435874,0.0,0
3,A,"(0, 1, 1)",4.723642,4.288426,0
4,A,"(1, 0, 0)",1.411243,0.231936,1
5,A,"(1, 0, 1)",8.03489,4.556489,39
6,A,"(1, 1, 0)",3.428519,0.0,0
7,A,"(1, 1, 1)",7.17395,90.018909,660
8,B,"(0, 0)",1.383047,0.660267,0
9,B,"(0, 1)",1.102118,0.243973,1


In [12]:
compute_logLikelihood(count_duration_df) - rtgem_model.size() * np.log(t_max)

1420.1849614716812

### Forward Search

In [13]:
import itertools
import random
import copy

In [14]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [15]:
model = RTGEM(empty_nodes(['A', 'B']),  default_end_timescale=1)

#### Initialisation 

In [16]:
initModelFromData(model, data, t_max)

<rtgemlib.rtgem.RTGEM at 0x2187bfd3e48>

In [17]:
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

In [18]:
lambdas_count_duration_df = mle_lambdas(model=model, count_and_duration=count_duration_df)

In [19]:
lambdas_count_duration_df

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,(),7.0,100,700
1,B,(),7.67,100,767


In [20]:
model.dpd_graph.nodes.data('lambdas')

NodeDataView({'A': {(): 7.0}, 'B': {(): 7.67}}, data='lambdas')

In [21]:
LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False
nodes = list(model.dpd_graph.nodes)
possible_edges = list(itertools.product(nodes, repeat = 2))

random.shuffle(possible_edges)

In [22]:
score

1448.5486079625623

In [23]:
scoreBic(model=model, observed_data=data, t_max=t_max)

1448.5486079625623

In [24]:
it = 0
forward_logs = []
while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in forward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,\
                                           possible_edges):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

        if score_ngbr > max_score_ngbr:
            max_score_ngbr = score_ngbr
            max_op = op
            max_args = args
            max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
            max_size_log_td_ngbr = size_log_td_ngbr
            max_LogL_ngbr = LogL_ngbr
    print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))
 
    if max_score_ngbr > score:
        max_op(*max_args)
        LogL = max_LogL_ngbr
        size_log_td = max_size_log_td_ngbr
        changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
        lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
        lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

        local_maximum = False
        score = max_score_ngbr
        op_name = 'étendreIntervalle'

        # removes added edge from possible edges
        if max_op == model.add_edge_operator:
            possible_edges.remove(max_args[0])
        # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            op_name = 'ajouterArc'
        if max_op == model.split_operator:
            op_name = 'diviserIntervalle'

        forward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                             copy.deepcopy(model.get_node_parents_timescales('B')),\
                             list(model.dpd_graph.edges()),\
                             op_name,\
                             max_args,\
                             score])
    it += 1

iteration number: 0: scoreBIC = 1448.5486079625623
max ngbr <bound method RTGEM.add_edge_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('B', 'B')], max_scoreBIC = 1450.8173481692425
iteration number: 1: scoreBIC = 1450.8173481692425
max ngbr <bound method RTGEM.split_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('B', 'B'), [0, 1]], max_scoreBIC = 1465.4539263483437
iteration number: 2: scoreBIC = 1465.4539263483437
max ngbr <bound method RTGEM.split_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('B', 'B'), [0, 0.5]], max_scoreBIC = 1554.0114355762498
iteration number: 3: scoreBIC = 1554.0114355762498
max ngbr <bound method RTGEM.split_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('B', 'B'), [0, 0.25]], max_scoreBIC = 1757.2604577383458
iteration number: 4: scoreBIC = 1757.2604577383458
max ngbr <bound method RTGEM.split_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48

In [25]:
initModelFromData(model=model, observed_data=data, t_max=t_max)

<rtgemlib.rtgem.RTGEM at 0x2187bfd3e48>

In [26]:
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

In [27]:
mle_lambdas(model, count_duration_df)

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,"(0, 0, 0, 0, 0, 0, 0)",0.000000,0.904240,0
1,A,"(0, 0, 0, 0, 0, 0, 1)",0.000000,4.288426,0
2,A,"(0, 0, 0, 0, 0, 1, 0)",0.000000,0.442450,0
3,A,"(0, 0, 0, 0, 0, 1, 1)",0.000000,13.692258,0
4,A,"(0, 0, 0, 0, 1, 0, 0)",0.000000,0.426986,0
5,A,"(0, 0, 0, 0, 1, 0, 1)",0.000000,3.749556,0
6,A,"(0, 0, 0, 0, 1, 1, 0)",0.000000,0.822805,0
7,A,"(0, 0, 0, 0, 1, 1, 1)",0.000000,18.301596,0
8,A,"(0, 0, 0, 1, 0, 0, 0)",0.000000,0.250383,0
9,A,"(0, 0, 0, 1, 0, 0, 1)",0.000000,1.637119,0


In [28]:
scoreBic(model, data, t_max)

3558.4739531609343

In [30]:
# keep forward result in memory
forward_model = copy.deepcopy(model)

## BackwardSearch(Forward)

In [31]:
initModelFromData(model=model, observed_data=data, t_max=t_max)
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

count_duration_df = mle_lambdas(model, count_duration_df)
LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False

In [32]:
score

3558.4739531609343

In [33]:
it = 0
backward_logs = []
local_maximum = False

while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in backward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

#         if score_ngbr > max_score_ngbr:
        max_score_ngbr = score_ngbr
        max_op = op
        max_args = args
        max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
        max_size_log_td_ngbr = size_log_td_ngbr
        max_LogL_ngbr = LogL_ngbr

        if max_score_ngbr > score:
            print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))

            max_op(*max_args)
            LogL = max_LogL_ngbr
            size_log_td = max_size_log_td_ngbr
            changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
            lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
            lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

            local_maximum = False
            score = max_score_ngbr
            op_name = 'supprimerArc'

            if max_op == model.inverse_extend_operator:
                op_name = 'reduireIntervalle'
            # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            if max_op == model.inverse_split_operator:
                op_name = 'FusionnerIntervalle'

            backward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                                 copy.deepcopy(model.get_node_parents_timescales('B')),\
                                 list(model.dpd_graph.edges()),\
                                 op_name,\
                                 max_args,\
                                 score])
            break
    it += 1

iteration number: 0: scoreBIC = 3558.4739531609343
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('A', 'A')], max_scoreBIC = 3848.920694451631
iteration number: 1: scoreBIC = 3848.920694451631
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('A', 'A')], max_scoreBIC = 3989.1780797650194
iteration number: 2: scoreBIC = 3989.1780797650194
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('A', 'A')], max_scoreBIC = 4060.9253456399074
iteration number: 3: scoreBIC = 4060.9253456399074
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x000002187BFD3E48>>, args=[('A', 'A')], max_scoreBIC = 4096.930745298885
iteration number: 4: scoreBIC = 4096.930745298885
max ngbr <bound method RTGEM.inverse_extend_operator of <rtgemlib.rtgem.RTGEM object at 0x0000021

In [35]:
model.dpd_graph.edges

OutEdgeView([('A', 'A'), ('B', 'B')])

In [39]:
initModelFromData(model=model, observed_data=data, t_max=t_max)

<rtgemlib.rtgem.RTGEM at 0x2187bfd3e48>

In [40]:
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

In [41]:
count_duration_df

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,"(0,)",2.962653,89.531374,0
1,A,"(1,)",1.157624,10.468626,700
2,B,"(0,)",2.434865,88.665337,0
3,B,"(1,)",6.12334,11.334663,767


In [42]:
mle_lambdas(model, count_duration_df)

Unnamed: 0,event,pcv,lambda_t,duration,count
0,A,"(0,)",0.0,89.531374,0
1,A,"(1,)",66.866463,10.468626,700
2,B,"(0,)",0.0,88.665337,0
3,B,"(1,)",67.668534,11.334663,767
