In [1]:
import glam
import pandas as pd
import numpy as np
import os.path
import arviz as az

import matplotlib.pyplot as plt

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


In [2]:
import pymc3 as pm

In [3]:
np.random.seed(23) # from random.org

# Hierarchical GLAM estimation and out of sample prediction
## eLife reanalysis

## Load data

In [4]:
# Load data
sufix = '_hierarchical_Less_Bin_Gamma-11_NUTS_33_eLife'
data = pd.read_csv('data/PF2019_data/GlamDataPF2019_Less_Bin_33.csv')

# Subset only necessary columns
data = data[['subject', 'trial', 'choice', 'rt',
         'item_value_0', 'item_value_1',
         'gaze_0', 'gaze_1']]
data.head()

Unnamed: 0,subject,trial,choice,rt,item_value_0,item_value_1,gaze_0,gaze_1
0,1,0,0,4261.735,6,7,0.603448,0.396552
1,1,1,1,3559.258,0,0,0.490772,0.509228
2,1,2,1,3754.464,0,0,0.490893,0.509107
3,1,3,0,2431.751,2,0,0.639125,0.360875
4,1,4,0,2199.342,0,0,0.702232,0.297768


## Split data in training and test sets

In [5]:
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for subject in data.subject.unique():
    subject_data = data[data['subject'] == subject].copy().reset_index(drop=True)
    n_trials = len(subject_data)
    
    subject_train = subject_data.iloc[np.arange(0, n_trials, 2)].copy()
    subject_test = subject_data.iloc[np.arange(1, n_trials, 2)].copy()

    test_data = pd.concat([test_data, subject_test])
    train_data = pd.concat([train_data, subject_train])

#test_data.to_csv(str('data/PF2019_data/GlamDataPF2019_preprocessed_test'+sufix+'.csv'))
#train_data.to_csv(str('data/PF2019_data/GlamDataPF2019_preprocessed_train'+sufix+'.csv'))

print('Split data into training ({} trials) and test ({} trials) sets...'.format(len(train_data), len(test_data)))

Split data into training (1920 trials) and test (1920 trials) sets...


In [6]:
train_data.subject.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33])

In [7]:
train_data.subject.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33])

In [8]:
# we renumber subject data for proper sequence
train_data2 = train_data.replace(train_data.subject.unique(), list(range(len(train_data.subject.unique()))))

In [9]:
train_data2.subject.unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

## Hierarchical GLAM estimation

### 1. full GLAM

In [10]:
# Fitting full GLAM
print('Fitting full GLAM hierarchically...')

glam_full = glam.GLAM(train_data2)

if not os.path.exists(str('results/estimates/glam_PF2019_full_hierarchical_cv'+sufix+'.npy')):
    glam_full.make_model('hierarchical', gamma_bounds=(-1, 1), t0_val=0)
    glam_full.fit(method='NUTS', tune=1000)
else:
    print('  Found old parameter estimates in "results/estimates". Skipping estimation...')
    glam_full.estimates = np.load(str('results/estimates/glam_PF2019_full_hierarchical_cv'+sufix+'.npy'))   

Fitting full GLAM hierarchically...
Generating hierarchical model for 32 subjects...


  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...


Fitting 1 model(s) using NUTS...
  Fitting model 1 of 1...


  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, tau_sd, tau_mu, SNR, SNR_sd, SNR_mu, gamma, gamma_sd, gamma_mu, v, v_sd, v_mu]


  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 7164 seconds.
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
There were 3 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.7154512169286372, but should be close to 0.8. Try to increase the number of tuning steps.
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.6860421887311469, but should be close to 0.8. Try to increase the number of tuning steps.
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
The rhat statistic is larger than 1.4 for some parameters. T

/!\ Automatically setting parameter precision...


In [11]:
# Save parameter estimates
np.save(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy'), glam_full.estimates)
pd.DataFrame(glam_full.estimates)

Unnamed: 0,b,p_error,v_mu,v_sd,v,gamma_mu,gamma_sd,gamma,SNR_mu,SNR_sd,SNR,s,tau_mu,tau_sd,tau,t0
0,1.0,0.05,4.4e-05,2.3e-05,4.7e-05,-0.64,0.1,-0.83,191.63,67.8,172.99,0.00731,0.76,0.5,0.65,0.0
1,1.0,0.05,4.4e-05,2.3e-05,1.9e-05,-0.64,0.1,-0.95,191.63,67.8,281.24,0.005324,0.76,0.5,1.06,0.0
2,1.0,0.05,4.4e-05,2.3e-05,4.5e-05,-0.64,0.1,-0.64,191.63,67.8,202.53,0.008786,0.76,0.5,0.1,0.0
3,1.0,0.05,4.4e-05,2.3e-05,2.7e-05,-0.64,0.1,-0.58,191.63,67.8,364.38,0.009589,0.76,0.5,1.32,0.0
4,1.0,0.05,4.4e-05,2.3e-05,2.7e-05,-0.64,0.1,-0.56,191.63,67.8,227.65,0.006119,0.76,0.5,0.84,0.0
5,1.0,0.05,4.4e-05,2.3e-05,6.6e-05,-0.64,0.1,-0.62,191.63,67.8,111.33,0.00775,0.76,0.5,0.53,0.0
6,1.0,0.05,4.4e-05,2.3e-05,3.5e-05,-0.64,0.1,-0.57,191.63,67.8,271.66,0.008658,0.76,0.5,1.44,0.0
7,1.0,0.05,4.4e-05,2.3e-05,4.7e-05,-0.64,0.1,-0.59,191.63,67.8,119.14,0.010842,0.76,0.5,0.21,0.0
8,1.0,0.05,4.4e-05,2.3e-05,0.000112,-0.64,0.1,-0.53,191.63,67.8,45.7,0.005171,0.76,0.5,0.0,0.0
9,1.0,0.05,4.4e-05,2.3e-05,5.9e-05,-0.64,0.1,-0.52,191.63,67.8,176.17,0.009949,0.76,0.5,0.12,0.0


# estimate convergence 

## 1. Rhat parameter

In [12]:
model_trace = glam_full.trace
rhats_params = az.rhat(model_trace, method="folded")

rhats_params_df = pd.DataFrame()
rhats_params_df['gamma'] = rhats_params.gamma.values
rhats_params_df['v'] = rhats_params.v.values
rhats_params_df['tau'] = rhats_params.tau.values
rhats_params_df['s'] = rhats_params.s.values

rhats_params_df  # if |rhat - 1 | < 0.05 (rhat: gelman-rubin statistic) the sampler converged 

  rval = inputs[0].__getitem__(inputs[1:])


Unnamed: 0,gamma,v,tau,s
0,1.738165,1.186639,1.258079,1.053812
1,2.173723,1.073386,1.322334,1.025258
2,1.334411,1.695364,1.255432,1.560893
3,1.327429,1.123609,1.120194,1.100191
4,1.286346,1.29552,1.08218,1.091589
5,1.164065,1.206953,1.111541,1.118246
6,1.384741,1.422557,1.158603,1.070011
7,1.299173,1.137552,1.296579,1.144321
8,1.24439,1.130278,1.091183,1.031746
9,1.708875,1.490136,1.732245,1.055452


## 2. effective sample size

In [13]:
ess_model = az.ess(model_trace, relative=False)

ess_params_df = pd.DataFrame()
ess_params_df['gamma'] = ess_model.gamma.values
ess_params_df['v'] = ess_model.v.values
ess_params_df['tau'] = ess_model.tau.values
ess_params_df['s'] = ess_model.s.values

ess_params_df

Unnamed: 0,gamma,v,tau,s
0,4.979555,8.318538,7.785854,15.629311
1,6.987034,17.156684,21.82091,26.114229
2,7.595718,9.242555,6.27283,10.74711
3,24.788681,7.992378,11.012336,26.315296
4,8.153438,8.908525,19.774377,28.270776
5,15.522435,7.47195,11.51879,11.500141
6,16.437503,17.248116,19.285776,34.679353
7,21.340958,7.52714,8.175543,9.07981
8,19.207144,8.944016,8.718288,9.337314
9,10.489565,6.430078,5.843272,30.575584


## 3. Percentage of divergence

In [14]:
# display the total number and percentage of divergent
divergent = model_trace['diverging']
print('Number of Divergent %d' % divergent.nonzero()[0].size)
divperc = divergent.nonzero()[0].size / len(model_trace) * 100
print('Percentage of Divergent %.1f' % divperc)

Number of Divergent 4
Percentage of Divergent 0.2


In [15]:
rhats_params_df.to_csv(str('results/convergence/GlamDataPF2019_hierarch_rhatsParams'+sufix+'.csv'))
ess_params_df.to_csv(str('results/convergence/GlamDataPF2019_hierarch_essParams'+sufix+'.csv'))

 # Waic scores (Less)

In [25]:
pm.waic(model_trace)

  rval = inputs[0].__getitem__(inputs[1:])
See http://arxiv.org/abs/1507.04544 for details
  "For one or more samples the posterior variance of the log predictive "


Computed from 8000 by 1 log-likelihood matrix

          Estimate       SE
elpd_waic -17833.35     0.00
p_waic      181.74        -


The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if
you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive
accuracy.

In [22]:
model_waic = pm.waic(model_trace,scale = 'negative_log')
print ('Model WAIC',model_waic.waic)

Model WAIC 17833.34625021151


See http://arxiv.org/abs/1507.04544 for details
  "For one or more samples the posterior variance of the log predictive "


In [23]:
pm.loo(model_trace,scale = 'negative_log')

  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Computed from 8000 by 1 log-likelihood matrix

          Estimate       SE
-elpd_loo 17711.52     0.00
p_loo        59.91        -


The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if
you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive
accuracy.

In [24]:
np.save(str('results/waic/glam_PF2019_full'+ sufix +'.npy'), model_waic)

In [16]:
# Compute WAICs
print('Computing WAIC scores for full model...')
if not os.path.exists(str('results/waic/glam_PF2019_full'+ sufix +'.npy')):
    # Note: DIC computation does not work for ADVI fitted models
    # But we are using WAIC
    glam_full.compute_waic()
else:
    print('  Found old DIC scores in "results/waic". Skipping WAIC computation...')
    glam_full.waic = np.load(str('results/waic/glam_PF2019_full'+ sufix +'.npy'))

# Compute WAICs
np.save(str('results/waic/glam_PF2019_full'+ sufix +'.npy'), glam_full.waic)

Computing WAIC scores for full model...


TypeError: waic() got an unexpected keyword argument 'trace'

In [None]:
glam_full.waic

In [None]:
# Compute LOO

glam_full.loo = pm.loo(trace=glam_full.trace, model=glam_full.model)
glam_full.loo
np.save(str('results/loo/glam_PF2019_full'+ sufix +'.npy'), glam_full.loo)

In [None]:
glam_full.loo

In [None]:
# Predictions
print('Predicting test set data using full GLAM...')
glam_full.exchange_data(test_data)

if not os.path.exists(str('results/predictions/glam_PF2019_full_hierarchical_cv'+sufix+'.csv')):
    glam_full.predict(n_repeats=50)
    glam_full.prediction.to_csv(str('results/predictions/glam_PF2019_full_hierarchical_cv'+sufix+'.csv'), index=False)
else:
    print('  Found old hierarchical full GLAM predictions in "results/predictions". Skipping prediction...')
    glam_full.prediction = pd.read_csv(str('results/predictions/glam_PF2019_full_hierarchical_cv'+sufix+'.csv'))

glam_full.prediction.head()

### 1. no-bias GLAM

In [None]:
# Fitting no-bias GLAM
print('Fitting no-bias GLAM hierarchically...')

glam_nobias = glam.GLAM(train_data)

if not os.path.exists(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy')):
    glam_nobias.make_model('hierarchical', gamma_val=1.0, t0_val=0)
    glam_nobias.fit(method='NUTS', tune=1000)
else:
    print('  Found old parameter estimates in "results/estimates". Skipping estimation...')
    glam_nobias.estimates = np.load(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy'))
 

In [None]:
   
# Save parameter estimates
np.save(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy'), glam_nobias.estimates)
pd.DataFrame(glam_nobias.estimates)

In [None]:
# In case it is already fitted
params_part_like = pd.DataFrame.from_dict(glam_nobias.estimates.item(0))
params_part_like

In [None]:
# Compute LOO

glam_nobias.loo = pm.loo(trace=glam_nobias.trace, model=glam_nobias.model)
glam_nobias.loo

np.save(str('results/loo/glam_PF2019_nobias'+ sufix +'.npy'), glam_nobias.loo
)

In [None]:
# Predictions
print('Predicting test set data using no-bias GLAM...')
glam_nobias.exchange_data(test_data)

if not os.path.exists(str('results/predictions/glam_PF2019_nobias_hierarchical_cv'+sufix+'.csv')):
    glam_nobias.predict(n_repeats=50)
    glam_nobias.prediction.to_csv(str('results/predictions/glam_PF2019_nobias_hierarchical_cv'+sufix+'.csv'), index=False)
else:
    print('  Found old hierarchical no-bias GLAM predictions in "results/predictions". Skipping prediction...')
    glam_nobias.prediction = pd.read_csv(str('results/predictions/glam_PF2019_nobias_hierarchical_cv'+sufix+'.csv'))

glam_nobias.prediction.head()

## 2. Plot fit

In [None]:
print('Close Figure to continue...')
glam.plot_fit(test_data, [glam_full.prediction]);
#glam.plot_fit(test_data, [glam_full.prediction,glam_nobias.prediction]);

plt.show()

## Parameters for full hierarchical model

In [None]:
params_participant = glam_full.estimates
params_participant

In [None]:
params_participant = pd.DataFrame.from_dict(glam_full.estimates.item(0))

In [None]:
params_participant

In [None]:
print ("Mean gamma " +  str(params_participant['gamma'].mean()))

In [None]:
hist = params_participant[['SNR','gamma','tau','v']].hist(figsize = [20,3] , layout=[1,4],bins = 20)

## [END] 