In [None]:
from rtgemlib import RTGEM
from rtgemlib import sample_from_tgem, LogLikelihood, scoreBic, mle_lambdas, LocaleLogLikelihood, get_count_duration_df, get_node_LogLikelihood, set_pcv_lambda_t, backward_neighbors_gen,\
compute_logLikelihood, set_nodes_timeseries, set_nodes_parents_counts, duration, get_parents_count_vector, forward_neighbors_gen, initModelFromData

from tqdm.autonotebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [None]:
model = {'B': 
            {
            'timescales': {'A' : [[1,2], [5, 6]]},\
            'lambdas': {
                       (0,0): 10, \
                       (0,1): 1.6, \
                       (1,0): 3, \
                       (1,1) : 1
                      }
            },
            'A': {
                'timescales': {'B': [[0,1], [10,15]]},\
                'lambdas': {
                       (0,0): 1, \
                       (0,1): 4, \
                       (1,0): 5, \
                       (1,1) : 9
                      }
            }
        }



In [None]:
rtgem_model = RTGEM(model)

In [None]:
t_max = 10000

## Sampling

In [None]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=t_max)

In [None]:
set_pcv_lambda_t(model=rtgem_model, data=sampled_data, t_max=t_max)

In [None]:
count_duration_df = get_count_duration_df(model=rtgem_model, data=sampled_data, t_max=t_max)

In [None]:
count_duration_df

## Likelihood

In [None]:
compute_logLikelihood(count_duration_df)

In [None]:
LogLikelihood(model=rtgem_model, observed_data=sampled_data, t_max=t_max)

## Parameters learning (lambdas)

In [None]:
mle_lambdas(data=sampled_data, model=rtgem_model, t_max=t_max)

## Structure learning

### Modèle de référence

In [None]:
rtgem_model = RTGEM(empty_nodes(['A', 'B']), default_end_timescale=1)

In [None]:
rtgem_model.add_edge_operator(('A', 'A'))
rtgem_model.add_edge_operator(('A', 'B'))
rtgem_model.add_edge_operator(('B', 'A'))

In [None]:
rtgem_model.split_operator(edge=('A', 'A'), timescale=[0,1])

In [None]:
rtgem_model.extend_operator(edge=('A', 'B'))

In [None]:
sampled_data = sample_from_tgem(rtgem_model, t_min=0, t_max=10000)

In [None]:
t_max = 100
data = sampled_data[sampled_data['time'] < t_max]

In [None]:
initModelFromData(model=rtgem_model, observed_data=data, t_max=t_max)

In [None]:
count_duration_df = get_count_duration_df(model=rtgem_model, data=data, t_max=t_max)

In [None]:
count_duration_df

In [None]:
compute_logLikelihood(count_duration_df) - rtgem_model.size() * np.log(t_max)

### Forward Search

In [None]:
import itertools
import random
import copy

In [None]:
def empty_nodes(nodes):
    return dict(zip(nodes, [{'timescales': {}, 'lambdas': {(): 1}}] * len(nodes)))

In [None]:
model = RTGEM(empty_nodes(['A', 'B']),  default_end_timescale=1)

#### Initialisation 

In [None]:
initModelFromData(model, data, t_max)

In [None]:
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

In [None]:
lambdas_count_duration_df = mle_lambdas(model=model, count_and_duration=count_duration_df)

In [None]:
lambdas_count_duration_df

In [None]:
model.dpd_graph.nodes.data('lambdas')

In [None]:
LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False
nodes = list(model.dpd_graph.nodes)
possible_edges = list(itertools.product(nodes, repeat = 2))

random.shuffle(possible_edges)

In [None]:
score

In [None]:
scoreBic(model=model, observed_data=data, t_max=t_max)

In [None]:
it = 0
forward_logs = []
while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in forward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,\
                                           possible_edges):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

        if score_ngbr > max_score_ngbr:
            max_score_ngbr = score_ngbr
            max_op = op
            max_args = args
            max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
            max_size_log_td_ngbr = size_log_td_ngbr
            max_LogL_ngbr = LogL_ngbr
    print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))
 
    if max_score_ngbr > score:
        max_op(*max_args)
        LogL = max_LogL_ngbr
        size_log_td = max_size_log_td_ngbr
        changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
        lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
        lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

        local_maximum = False
        score = max_score_ngbr
        op_name = 'étendreIntervalle'

        # removes added edge from possible edges
        if max_op == model.add_edge_operator:
            possible_edges.remove(max_args[0])
        # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            op_name = 'ajouterArc'
        if max_op == model.split_operator:
            op_name = 'diviserIntervalle'

        forward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                             copy.deepcopy(model.get_node_parents_timescales('B')),\
                             list(model.dpd_graph.edges()),\
                             op_name,\
                             max_args,\
                             score])
    it += 1

In [None]:
initModelFromData(model=model, observed_data=data, t_max=t_max)

In [None]:
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

In [None]:
mle_lambdas(model, count_duration_df)

In [None]:
scoreBic(model, data, t_max)

In [None]:
# keep forward result in memory
forward_model = copy.deepcopy(model)

## BackwardSearch(Forward)

In [None]:
initModelFromData(model=model, observed_data=data, t_max=t_max)
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

count_duration_df = mle_lambdas(model, count_duration_df)
LogL = compute_logLikelihood(lambdas_count_duration_df)
log_td = np.log(t_max)

size_log_td = model.size() * log_td

score = LogL - size_log_td
local_maximum = False

In [None]:
score

In [None]:
it = 0
backward_logs = []
local_maximum = False

while not local_maximum:
    #     max_ngbr_score = -np.inf
    local_maximum = True
    max_score_ngbr = -np.inf
    max_op = None
    max_args = None
    max_changed_node_cnt_drt_df = None
    max_size_log_td_ngbr = None
    max_LogL_ngbr = None
    print('iteration number: {}: scoreBIC = {}'.format(it, score))
    for ngbr_info in backward_neighbors_gen(model, data, t_max, lambdas_count_duration_df,LogL, size_log_td, log_td,):

        op, args, LogL_ngbr, size_log_td_ngbr, changed_node_cnt_drt_df = ngbr_info
        score_ngbr = LogL_ngbr - size_log_td_ngbr

#         if score_ngbr > max_score_ngbr:
        max_score_ngbr = score_ngbr
        max_op = op
        max_args = args
        max_changed_node_cnt_drt_df = changed_node_cnt_drt_df
        max_size_log_td_ngbr = size_log_td_ngbr
        max_LogL_ngbr = LogL_ngbr

        if max_score_ngbr > score:
            print('max ngbr {}, args={}, max_scoreBIC = {}'.format(max_op, max_args, max_score_ngbr))

            max_op(*max_args)
            LogL = max_LogL_ngbr
            size_log_td = max_size_log_td_ngbr
            changed_node = max_changed_node_cnt_drt_df.iloc[0]['event']
            lambdas_count_duration_df = lambdas_count_duration_df[lambdas_count_duration_df['event'] != changed_node]
            lambdas_count_duration_df = pd.concat([lambdas_count_duration_df, max_changed_node_cnt_drt_df])

            local_maximum = False
            score = max_score_ngbr
            op_name = 'supprimerArc'

            if max_op == model.inverse_extend_operator:
                op_name = 'reduireIntervalle'
            # pd.Dataframe(columns=['it', 'T_A', 'T_B', 'edges', 'max_ngbr', 'scoreBic'])
            if max_op == model.inverse_split_operator:
                op_name = 'FusionnerIntervalle'

            backward_logs.append([it, copy.deepcopy(model.get_node_parents_timescales('A')),\
                                 copy.deepcopy(model.get_node_parents_timescales('B')),\
                                 list(model.dpd_graph.edges()),\
                                 op_name,\
                                 max_args,\
                                 score])
            break
    it += 1

In [None]:
model.dpd_graph.edges

In [None]:
nb_it_forward = len(bic_scores_forward)
nb_it_backward = len(bic_scores_backward)

In [None]:
nb_it_forward

In [None]:
nb_it_backward

In [None]:
len(x)

In [None]:
# abscissa
x = [i for i in range(nb_it_forward + nb_it_backward - 1)]

# ordinate
y_for = bic_scores_forward
y_back = bic_scores_backward

In [None]:
# ordinate
y_ref = [score_bic_reference for i in range(nb_it_forward + nb_it_backward)]

In [None]:
# plot the data
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
#, np.array(x)[:(nb_it_backward-1)], y_back, "b"
#ax.plot(np.array(x)[(nb_it_forward-1):], y_for, 'r', x, y_ref, "g")
ax.plot(np.array(x)[:nb_it_forward], y_for, 'r', np.array(x)[(nb_it_backward+1):], y_back, "b", x, y_ref, "g")

In [None]:
len(y_for)

In [None]:
len(np.array(x)[:(nb_it_forward)])

In [None]:
len(y_back)

In [None]:
len(np.array(x)[(nb_it_forward-1):])

In [None]:
y_back.insert(0, y_for[len(y_for)-1])

In [None]:
y_for[len(y_for)-1]

In [None]:
y_back

In [None]:
x = [i for i in range(nb_it_forward + nb_it_backward)]


# plot the data
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
#, np.array(x)[:(nb_it_backward-1)], y_back, "b"
#ax.plot(np.array(x)[(nb_it_forward-1):], y_for, 'r', x, y_ref, "g")
ax.plot(np.array(x)[:nb_it_forward], y_for, 'r', np.array(x)[(nb_it_forward-1):], y_back, "b", x, y_ref, "g")

In [None]:
initModelFromData(model=model, observed_data=data, t_max=t_max)

In [None]:
count_duration_df = get_count_duration_df(model=model, data=data, t_max=t_max)

In [None]:
count_duration_df

In [None]:
mle_lambdas(model, count_duration_df)