In [1]:
import matplotlib.pyplot as plt
import dill
import numpy as np
from tqdm.notebook import tqdm
from hmmlearn import hmm, vhmm

from synthetic_data_generation_functions import *
from synthetic_data_analysis_functions import *
from hmm_functions import *


plt.style.use('/home/david/Documents/code/phd/paper.mplstyle')

print(plt.get_backend())
%matplotlib qt5
print(plt.get_backend())

module://matplotlib_inline.backend_inline
qt5agg


QSocketNotifier: Can only be used with threads started with QThread


# Simulations generations

## Parameters

In [2]:
p_cw_reward = 0.8
p_ccw_reward = 0
p_cw_init = 0.5
steps_number = 100
noise_amplitude = 0.1
drift_matrix = np.array([[ 0.05 , -0.05],
                         [ 0    , 0   ]])
drift_init = 0.0

args_dict = {'p_cw_reward': p_cw_reward, 'p_ccw_reward': p_ccw_reward, 'p_cw_init': p_cw_init, 'steps_number': steps_number, 'noise_amplitude': noise_amplitude, 'drift_matrix': drift_matrix, 'drift_init': drift_init}

number_of_simulations_perset = 40

n_simulations_list = [number_of_simulations_perset]*32

start_index = 0

simulations_folder_path = '/home/david/Documents/code/DDM_v3_synthetic_data_identical_drifts'

## Generation

In [3]:

generate_simulations = False # PARAMS

for i, n_simulations in enumerate(n_simulations_list):
    
    if not(generate_simulations):

        break

    simulations_batch = run_simulations_batch_switch(args_dict, n_simulations)

    with open(f'{simulations_folder_path}/n_{n_simulations}/simulations_batch_{n_simulations}_test_{start_index + i+1}.pkl', 'wb') as file:
        dill.dump(simulations_batch, file)

# HMM fit

## Parameters

In [4]:
n_to_test = np.arange(2,16)


## Model fit

In [5]:
fit_model = False # PARAM

if fit_model:

    for index, n_simulations in enumerate(tqdm(n_simulations_list)):

        ####################
        ### Loading Data ###
        ####################

        with open(f'{simulations_folder_path}/n_{n_simulations}/simulations_batch_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
            synthetic_data = dill.load(file)


        ########################
        ### Reformating Data ###
        ########################

        slice_size = int(n_simulations/2)
        
        # To do if simulations are sorted by parameters
        synthetic_data = synthetic_data[:int(slice_size/2)] + synthetic_data[int(slice_size):int(3*slice_size/2)] + synthetic_data[int(slice_size/2):int(slice_size)] + synthetic_data[int(3*slice_size/2):]

        training_data = [synthetic_data[i]['choices'] for i in np.arange(0,slice_size)]
        validation_data = [synthetic_data[i]['choices'] for i in np.arange(slice_size,2*slice_size)]


        training_emissions = np.array([]).astype(int)
        validation_emissions = np.array([]).astype(int)

        for x,y in zip(training_data,validation_data):

            training_emissions = np.concatenate((training_emissions, x))
            validation_emissions = np.concatenate((validation_emissions, y))

        training_emissions = training_emissions.reshape(-1,1)
        training_emissions_lengths = [len(x) for x in training_data]

        validation_emissions = validation_emissions.reshape(-1,1)
        validation_emissions_lengths = [len(y) for y in validation_data]

        ###################
        ### Infer model ###
        ###################

        best_model, best_score = infer_best_model_score(training_emissions, validation_emissions, 
                                                training_emissions_lengths, validation_emissions_lengths, 
                                                n_to_test, leave_loading_bar=False, verbose=False)
        
        ##################
        ### Save model ###
        ##################
        
        with open(f'{simulations_folder_path}/n_{n_simulations}/best_model_score_{n_simulations}_test_{start_index + index+1}.pkl', 'wb') as file:
            dill.dump(best_model, file)



# Drift Estimation

## Generating "theoretical" average probability sequences

In [6]:
# generate_theoretical_sequences = False # PARAM

# direction = 'cw'

# if generate_theoretical_sequences:

#     args = synthetic_data[0]['parameters'] + [5000]
#     args[0][f'']
#     drift_range = np.linspace(0.01,0.2,200)

#     # test_average_probability_sequences = generate_test_average_probability_sequences_identical_drifts(drift_range, args)
#     test_average_probability_sequences = generate_test_average_probability_sequences_identical_drifts(drift_range, args)

#     with open(f'{simulations_folder_path}/test_average_probability_{direction}_sequences.pkl', 'wb') as file:
#         dill.dump([drift_range, test_average_probability_sequences], file)

# else:
 
#     with open(f'{simulations_folder_path}/test_average_probability_{direction}_sequences.pkl', 'rb') as file:
#         drift_range, test_average_probability_sequences = dill.load(file)


In [7]:
with open(f'{simulations_folder_path}/n_{n_simulations}/simulations_batch_{n_simulations}_test_{start_index + 0+1}.pkl', 'rb') as file:
            synthetic_data = dill.load(file)

In [8]:
generate_theoretical_sequences = False # PARAM

if generate_theoretical_sequences:

    args = [synthetic_data[0]['parameters']] + [5000]
    drift_range = np.linspace(0.01,0.2,200)

    # test_average_probability_sequences = generate_test_average_probability_sequences_identical_drifts(drift_range, args)
    test_msd_sequence_list = generate_test_msd_sequences_identical_drifts_switch(drift_range, args)

    with open(f'{simulations_folder_path}/test_msd_sequences.pkl', 'wb') as file:
        dill.dump([drift_range, test_msd_sequence_list], file)

else:
 
    with open(f'{simulations_folder_path}/test_msd_sequences.pkl', 'rb') as file:
        drift_range, test_msd_sequence_list = dill.load(file)


## Minimizing mean square error

In [9]:
# save_result = True # PARAM
# average_proba_sequences_hmm = []
# msd_sequences_hmm = []

# for index, _ in enumerate(tqdm(n_simulations_list)):
# # for index, _ in enumerate(n_simulations_list):

#     ##############################
#     ### Loading Data and Model ###
#     ##############################

#     with open(f'{simulations_folder_path}/n_{n_simulations}/simulations_batch_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
#         synthetic_data = dill.load(file)

#     with open(f'{simulations_folder_path}/n_{n_simulations}/best_model_score_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
#         model = dill.load(file)

#     ########################
#     ### Reformating Data ###
#     ########################

#     test_data = [synth_data['choices'] for synth_data in synthetic_data]

#     initial_state_list = []
#     sequences_number = len(test_data)

#     for i in range(sequences_number):
        
#         choices_sequence = test_data[i]
        
#         states_sequence = model.predict(np.int16(choices_sequence.reshape(-1,1)))
#         initial_state_list.append(states_sequence[0])

#     initial_state_list_distri = []

#     for s in range(len(model.transmat_)):

#         initial_state_list_distri.append(initial_state_list.count(s))

#     transmat = model.transmat_
#     emission_vect = model.emissionprob_[:,1]
#     mat = transmat
#     sorted_indexes = np.argsort(emission_vect)
#     vector = np.ones([len(transmat),1])/len(transmat)

#     ##

#     new_transmat = order_matrix(mat, sorted_indexes)

#     ##

#     new_emissionmat = []
#     new_initial_state_list_distri = []

#     for i in sorted_indexes:
#         new_emissionmat.append(model.emissionprob_[i,:])
#         new_initial_state_list_distri.append(initial_state_list_distri[i])

#     new_emissionmat = np.array(new_emissionmat)
#     new_initial_state_list_distri = np.array(new_initial_state_list_distri)/np.sum(new_initial_state_list_distri)


#     ####################
#     ### Computations ###
#     ####################

#     average_proba_sequence_hmm = []
#     msd_sequence_hmm = []

#     steps = np.arange(len(test_data[0]))
#     new_mat_i = new_transmat

#     for i in steps:

#         new_mat_i = np.matmul(new_mat_i,new_transmat)
#         res = np.matmul(new_initial_state_list_distri,new_mat_i)*new_emissionmat[:,1]
        
#         # msd = np.sum(np.matmul(new_initial_state_list_distri,new_mat_i) * (res - new_initial_state_list_distri*new_emissionmat[:,1]))**2
#         msd = np.sum(np.matmul(new_initial_state_list_distri,new_mat_i) * (res - new_initial_state_list_distri*new_emissionmat[:,1])**2)
#         # msd = np.sum((res - new_initial_state_list_distri*new_emissionmat[:,1])**2)
        
#         average_proba_sequence_hmm.append(np.sum(res))
#         msd_sequence_hmm.append(msd)

#     average_proba_sequences_hmm.append(average_proba_sequence_hmm)
#     msd_sequences_hmm.append(msd_sequence_hmm)

#     if not(save_result):

#         continue

#     mse_list = []

#     for test_msd_sequence in tqdm(test_msd_sequence_list, leave=False):
#     # for test_average_probability_sequence in test_average_probability_sequences:

#         mse_list.append(compute_mean_square_error_v2(average_proba_sequence_hmm, test_msd_sequence))

#     min_mse = np.min(mse_list)
#     recovered_drift = drift_range[np.where(mse_list==min_mse)[0]]

#     ##############################
#     ### Saving Recovered drift ###
#     ##############################
    
#     with open(f'{simulations_folder_path}/n_{n_simulations}/recovered_drift_{n_simulations}_{start_index + index+1}.pkl', 'wb') as file:
#         dill.dump([test_msd_sequence,recovered_drift], file)


In [10]:
save_result = True # PARAM
average_proba_sequences_hmm = []
msd_sequences_hmm = []
nbr_of_states = []
ordered_action_matrixes = []

for index, _ in enumerate(tqdm(n_simulations_list)):
# for index, _ in enumerate(n_simulations_list):

    ##############################
    ### Loading Data and Model ###
    ##############################

    with open(f'{simulations_folder_path}/n_{n_simulations}/simulations_batch_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
        synthetic_data = dill.load(file)

    with open(f'{simulations_folder_path}/n_{n_simulations}/best_model_score_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
        model = dill.load(file)

    nbr_of_states.append(len(model.transmat_))

    ########################
    ### Reformating Data ###
    ########################

    test_data = [synth_data['choices'] for synth_data in synthetic_data]

    initial_state_list = []
    sequences_number = len(test_data)

    for i in range(sequences_number):
        
        choices_sequence = test_data[i]
        
        states_sequence = model.predict(np.int16(choices_sequence.reshape(-1,1)))
        initial_state_list.append(states_sequence[0])

    initial_state_list_distri = []

    for s in range(len(model.transmat_)):

        initial_state_list_distri.append(initial_state_list.count(s))

    transmat = model.transmat_
    emission_vect = model.emissionprob_[:,1]
    mat = transmat
    sorted_indexes = np.argsort(emission_vect)
    vector = np.ones([len(transmat),1])/len(transmat)

    ##

    new_transmat = order_matrix(mat, sorted_indexes)

    ##

    new_emissionmat = []
    new_initial_state_list_distri = []

    for i in sorted_indexes:
        new_emissionmat.append(model.emissionprob_[i,:])
        new_initial_state_list_distri.append(initial_state_list_distri[i])

    new_emissionmat = np.array(new_emissionmat)
    ordered_action_matrixes.append(new_emissionmat)
    new_initial_state_list_distri = np.array(new_initial_state_list_distri)/np.sum(new_initial_state_list_distri)

    ####################
    ### Computations ###
    ####################

    ## Probability sequences inference

    reconstructed_proba_sequences = infer_probability_sequence(model, test_data)

    ## MSD computation

    average_proba_sequence_hmm = []
    msd_sequence_hmm = []

    steps = np.arange(len(test_data[0]))
    new_mat_i = new_transmat

    res = (np.array(reconstructed_proba_sequences) - p_cw_init)**2 # replace np.array(test_data) with hmm-infered probability
    
    msd_sequence_hmm = np.mean(res, axis=0)

    average_proba_sequences_hmm.append(average_proba_sequence_hmm)
    msd_sequences_hmm.append(msd_sequence_hmm)

    if not(save_result):

        continue

    mse_list = []

    for test_msd_sequence in tqdm(test_msd_sequence_list, leave=False):
    # for test_average_probability_sequence in test_average_probability_sequences:

        mse_list.append(compute_mean_square_error_v2(msd_sequence_hmm, test_msd_sequence))

    min_mse = np.min(mse_list)
    recovered_drift = drift_range[np.where(mse_list==min_mse)[0]]

    ##############################
    ### Saving Recovered drift ###
    ##############################
    
    with open(f'{simulations_folder_path}/n_{n_simulations}/recovered_drift_{n_simulations}_{start_index + index+1}.pkl', 'wb') as file:
        dill.dump([test_msd_sequence,recovered_drift], file)


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [11]:
recovered_drift

array([0.06251256])

# Mean square error analysis

## Recovered drift loading

In [12]:
recovered_drift_list = []

for index, _ in enumerate(n_simulations_list):

    with open(f'{simulations_folder_path}/n_{n_simulations}/recovered_drift_{n_simulations}_{start_index + index+1}.pkl', 'rb') as file:
        _, recovered_drift = dill.load(file)
    
    recovered_drift_list.append(recovered_drift[0])

# Plots

In [13]:
# fig=plt.figure(figsize=(1, 4), dpi=300, constrained_layout=False, facecolor='w')
# gs = fig.add_gridspec(1, 1)
# row = gs[:].subgridspec(1, 1, hspace=0.5)

# ax1 = plt.subplot(row[0,0])

# ax1.scatter(np.ones(len(n_simulations_list)), recovered_drift_list, marker='s', alpha=0.01, s=3, linewidth=0)

# ax1.axhline(0.05, linewidth=0.7, color='k', linestyle='--', label='Drift to recover')

# ax1.set_title(f'{len(n_simulations_list)} sets of {number_of_simulations_perset} simulations of length {steps_number}')

# ax1.set_xticks([])
# ax1.set_ylabel('Recovered drift')

In [14]:
fig=plt.figure(figsize=(4, 4), dpi=300, constrained_layout=False, facecolor='w')
gs = fig.add_gridspec(1, 1)
row = gs[:].subgridspec(1, 1, hspace=0.5)

ax1 = plt.subplot(row[0,0])

histo = np.histogram(recovered_drift_list, bins=np.linspace(0.01,0.2,51), density=True)
bin_width = histo[1][1] - histo[1][0]
ax1.stairs(histo[0]/np.sum(histo[0])/bin_width, histo[1], alpha=0.5, fill=True, label = 'Recovered drift probability density')

x = np.linspace(-0.2,0.2)
sigma = 0.1
ax1.plot(x, np.exp(-x**2/(2*sigma**2)) * 1/np.sqrt(2*sigma**2*np.pi), label = "Noise probability density")

ax1.axvline(0.05, linewidth=0.7, color='k', linestyle='--', label='Drift to recover')

# ax1.set_xticks([])
ax1.set_title(f'{len(n_simulations_list)} sets of {number_of_simulations_perset} simulations of length {steps_number}')

ax1.set_ylim([0,None])
ax1.set_xlabel('Recovered drift')
ax1.set_ylabel('Probability density')

ax1.legend()

<matplotlib.legend.Legend at 0x7eb8b9e3c710>

In [15]:
fig=plt.figure(figsize=(1, 4), dpi=300, constrained_layout=False, facecolor='w')
gs = fig.add_gridspec(1, 1)
row = gs[:].subgridspec(1, 1, hspace=0.5)

ax1 = plt.subplot(row[0,0])

histo = np.histogram(recovered_drift_list, bins=np.linspace(0.01,0.2,51), density=False)
ax1.stairs(histo[0], histo[1], alpha=0.5, fill=True)
ax1.axvline(0.05, linewidth=0.7, color='k', linestyle='--', label='Drift to recover')

# ax1.set_xticks([])
ax1.set_title(f'{len(n_simulations_list)} sets of {number_of_simulations_perset} simulations of length {steps_number}')

ax1.set_ylim([0,None])
ax1.set_xlabel('Recovered drift')
ax1.set_ylabel('Number of simulations sets')

Text(0, 0.5, 'Number of simulations sets')

In [None]:
msd_sequence_hmm_theory = []

steps = np.arange(len(test_data[0]))
new_mat_i = new_transmat

for i in steps:

    new_mat_i = np.matmul(new_mat_i,new_transmat)

    res = np.matmul(new_initial_state_list_distri,new_mat_i)*new_emissionmat[:,1]
        
    # msd = np.sum(np.matmul(new_initial_state_list_distri,new_mat_i) * (res - new_initial_state_list_distri*new_emissionmat[:,1]))**2
    msd = np.sum(np.matmul(new_initial_state_list_distri,new_mat_i) * (res - new_initial_state_list_distri*new_emissionmat[:,1])**2)
    # msd = np.sum((res - new_initial_state_list_distri*new_emissionmat[:,1])**2)
    
    msd_sequence_hmm_theory.append(msd)


In [17]:
fig=plt.figure(figsize=(3, 3), dpi=300, constrained_layout=False, facecolor='w')
gs = fig.add_gridspec(1, 1)
row = gs[:].subgridspec(1, 1, hspace=0.5)


ax1 = plt.subplot(row[0,0])

index_simu_to_plot = 0

# ax1.plot(average_proba_sequences_hmm[index_simu_to_plot], label='Average probability infered by HMM', color='black')
ax1.plot(msd_sequences_hmm[index_simu_to_plot], label='Mean Square Displacement infered by HMM', color='black')
ax1.plot(msd_sequence_hmm_theory, color='grey')

drift_range_to_plot = [(i,drift_range[i]) for i in range(0, len(drift_range), 42)]

for i, d in drift_range_to_plot:

    ax1.plot(test_msd_sequence_list[i], label=f'Mean Square Displacement of 5000 simu. with drift={np.round(d,4)}', alpha=0.5, linestyle='--')

ax1.axhline(0.25, linewidth=0.7, color='k', linestyle='--', label='Drift to recover')

ax1.set_xticks([])
ax1.set_title(f'Set of {number_of_simulations_perset} simulations of length {steps_number}\n True drifts: {drift_matrix}, Recovered drift: {np.round(recovered_drift_list[index_simu_to_plot],4)}', fontsize=5)

ax1.set_xlabel('Step')
ax1.set_ylabel('Mean Square Displacement of P_cw')

ax1.legend()

<matplotlib.legend.Legend at 0x7eb8b9e827b0>

In [18]:
ordered_action_matrixes

[array([[0.96205624, 0.03794376],
        [0.95267655, 0.04732345],
        [0.84375368, 0.15624632],
        [0.78302553, 0.21697447],
        [0.71874029, 0.28125971],
        [0.50708273, 0.49291727],
        [0.44691332, 0.55308668],
        [0.39569511, 0.60430489],
        [0.32321061, 0.67678939],
        [0.05723792, 0.94276208]]),
 array([[0.9963194 , 0.0036806 ],
        [0.97279387, 0.02720613],
        [0.94769533, 0.05230467],
        [0.68691981, 0.31308019],
        [0.56270843, 0.43729157],
        [0.39883639, 0.60116361],
        [0.32607917, 0.67392083],
        [0.03047486, 0.96952514]]),
 array([[0.99498577, 0.00501423],
        [0.98195664, 0.01804336],
        [0.95904037, 0.04095963],
        [0.71779438, 0.28220562],
        [0.68065264, 0.31934736],
        [0.42148142, 0.57851858],
        [0.25868241, 0.74131759],
        [0.03226617, 0.96773383]]),
 array([[0.98425641, 0.01574359],
        [0.92446869, 0.07553131],
        [0.76332589, 0.23667411],
        

In [19]:
fig=plt.figure(figsize=(3, 3), dpi=300, constrained_layout=False, facecolor='w')
gs = fig.add_gridspec(1, 1)
row = gs[:].subgridspec(2, 1, hspace=0.5)

ax1 = plt.subplot(row[0,0])
ax2 = plt.subplot(row[1,0])

for i, _ in enumerate(n_simulations_list):

    ax1.plot(ordered_action_matrixes[i][:,1], label=f'', alpha=0.3, linestyle='--')
    # ax1.plot(np.linspace(0,1,len(ordered_action_matrixes[i][:,1])), ordered_action_matrixes[i][:,1], label=f'', alpha=0.3, linestyle='--')
    ax2.plot(np.diff(ordered_action_matrixes[i][:,1]), label=f'', alpha=0.3, linestyle='--')
    
ax1.set_ylabel('P_cw')
ax1.set_ylabel('P_cw difference')

ax2.set_xlabel('State')

# ax1.legend()


Text(0.5, 0, 'State')

In [20]:
fig=plt.figure(figsize=(3, 3), dpi=300, constrained_layout=False, facecolor='w')
gs = fig.add_gridspec(1, 1)
row = gs[:].subgridspec(1, 1, hspace=0.5)

ax1 = plt.subplot(row[0,0])

for i, _ in enumerate(n_simulations_list):

    # ax1.scatter(i, np.var(np.diff(ordered_action_matrixes[i][:,1])), label=f'', alpha=0.5)
    ax1.scatter(i, np.mean(np.diff(ordered_action_matrixes[i][:,1])), label=f'', alpha=0.5)
    
    # ax1.plot(np.linspace(0,1,len(ordered_action_matrixes[i][:,1])), ordered_action_matrixes[i][:,1], label=f'', alpha=0.3, linestyle='--')
    
# ax1.set_ylabel('P_cw states difference variance')
ax1.set_xlabel('State')
ax1.set_ylabel('P_cw states average difference')


# ax1.legend()



Text(0, 0.5, 'P_cw states average difference')

In [21]:
def reformat_hmm(synthetic_data, model):

    nbr_of_states = len(model.transmat_)

    ########################
    ### Reformating Data ###
    ########################

    test_data = [synth_data['choices'] for synth_data in synthetic_data]

    initial_state_list = []
    sequences_number = len(test_data)

    for i in range(sequences_number):
        
        choices_sequence = test_data[i]
        
        states_sequence = model.predict(np.int16(choices_sequence.reshape(-1,1)))
        initial_state_list.append(states_sequence[0])

    initial_state_list_distri = []

    for s in range(len(model.transmat_)):

        initial_state_list_distri.append(initial_state_list.count(s))

    transmat = model.transmat_
    emission_vect = model.emissionprob_[:,1]
    mat = transmat
    sorted_indexes = np.argsort(emission_vect)

    ##

    new_transmat = order_matrix(mat, sorted_indexes)

    ##

    new_emissionmat = []
    new_initial_state_list_distri = []

    for i in sorted_indexes:
        new_emissionmat.append(model.emissionprob_[i,:])
        new_initial_state_list_distri.append(initial_state_list_distri[i])

    new_emissionmat = np.array(new_emissionmat)
    new_initial_state_distri = np.array(new_initial_state_list_distri)/np.sum(new_initial_state_list_distri)

    return {'emissionmat':new_emissionmat, 'initial_state_distri':new_initial_state_distri, 'transmat':new_transmat}

In [34]:

for index, _ in enumerate(n_simulations_list):

    with open(f'{simulations_folder_path}/n_{n_simulations}/simulations_batch_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
        synthetic_data = dill.load(file)

    with open(f'{simulations_folder_path}/n_{n_simulations}/best_model_score_{n_simulations}_test_{start_index + index+1}.pkl', 'rb') as file:
        model = dill.load(file)

    test_data = [synth_data['choices'] for synth_data in synthetic_data]

    reformated_hmm = reformat_hmm(synthetic_data, model)

    reformated_emissionmat = reformated_hmm['emissionmat']
    reformated_initial_state_distri = reformated_hmm['initial_state_distri']
    reformated_transmat = reformated_hmm['transmat']

    steps = np.arange(len(test_data[0]))
    mat_i = reformated_transmat

    
    for i in steps:

        mat_i_previous = mat_i
        mat_i = np.matmul(mat_i,reformated_transmat)
        average_drift_hmm_vect = (np.matmul(reformated_initial_state_distri, mat_i) - np.matmul(reformated_initial_state_distri,mat_i_previous)) * reformated_emissionmat[:,1]

        total_rewards = 0

        for sim in synthetic_data:
        
            total_rewards += sim['rewards'][i]

        # print(average_reward)
    
    average_reward = total_rewards/len(n_simulations_list)

    recovered_drift = np.sum(average_drift_hmm_vect)/average_reward

    print(recovered_drift)




-0.00016992383310156776
0.00019222171873879682
-0.0007771067211972783
0.0004044025118817759
-0.00015271649341638688
-0.0010146798181011254
-0.0005092719624783823
-0.0011741817490026338
0.0002234864575096925
0.0008787181315303305
0.0003268021158465253
-0.0002056365144445793
0.00029976893465435137
-9.05146359289255e-05
-0.000219437807508145
7.144957245984092e-05
0.0005742114071417636
0.0006333785799580349
0.0004133590098947982
-0.0003611466327717345
-0.00045131866596782313
-0.0006814032181251408
0.00024128019078914267
0.00010521813668845097
0.00023333973649005454
0.000982385281183309
0.00037326780292475803
-3.2917570666323485e-05
0.00012793471038722185
0.00032130184513818005
-0.00033307454933063885
0.0005674808857722948


In [None]:
synthetic_data[sim]['re

TypeError: only integer scalar arrays can be converted to a scalar index