In [1]:
import glam
import pandas as pd
import numpy as np
import os.path
import arviz as az

import matplotlib.pyplot as plt

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


In [2]:
import pymc3 as pm

In [3]:
np.random.seed(23) # from random.org

# 3.1. Hierarchical GLAM estimation and out of sample prediction

## Load data

In [4]:
# Load data
sufix = '_hierarchical_More_NoBin_Gamma-11_NUTS_33_eLife2'
data = pd.read_csv('data/PF2019_data/GlamDataPF2019_More_NoBin_33.csv')
#data = pd.read_csv('data/PF2019_data/GlamDataFF2018_Like_NoBin_TEST.csv')

# Subset only necessary columns
data = data[['subject', 'trial', 'choice', 'rt',
         'item_value_0', 'item_value_1',
         'gaze_0', 'gaze_1']]
data.head()

Unnamed: 0,subject,trial,choice,rt,item_value_0,item_value_1,gaze_0,gaze_1
0,1,0,1,1734.284,110,131,0.66909,0.33091
1,1,1,0,6555.37,47,50,0.75963,0.24037
2,1,2,0,3174.566,50,44,0.549371,0.450629
3,1,3,1,2877.579,57,50,0.608409,0.391591
4,1,4,1,1806.31,42,50,0.522849,0.477151


In [5]:
#data = data.loc[data["subject"] < 3 ]

In [6]:
# scale down the measures
data['item_value_0'] = data['item_value_0']/10
data['item_value_1'] = data['item_value_1']/10

In [7]:
data = data[ (data['subject'] != 1) & (data['subject'] != 13) & (data['subject'] != 16) & (data['subject'] != 20)]

## Split data in training and test sets

In [8]:
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for subject in data.subject.unique():
    subject_data = data[data['subject'] == subject].copy().reset_index(drop=True)
    n_trials = len(subject_data)
    
    subject_train = subject_data.iloc[np.arange(0, n_trials, 2)].copy()
    subject_test = subject_data.iloc[np.arange(1, n_trials, 2)].copy()

    test_data = pd.concat([test_data, subject_test])
    train_data = pd.concat([train_data, subject_train])

#test_data.to_csv(str('data/PF2019_data/GlamDataPF2019_preprocessed_test'+sufix+'.csv'))
#train_data.to_csv(str('data/PF2019_data/GlamDataPF2019_preprocessed_train'+sufix+'.csv'))

print('Split data into training ({} trials) and test ({} trials) sets...'.format(len(train_data), len(test_data)))

Split data into training (1680 trials) and test (1680 trials) sets...


In [9]:
train_data
#test_data

Unnamed: 0,subject,trial,choice,rt,item_value_0,item_value_1,gaze_0,gaze_1
0,2,0,1,6228.547,11.0,13.1,0.575607,0.424393
2,2,2,0,4766.744,5.0,4.4,0.794866,0.205134
4,2,4,1,5673.265,4.2,5.0,0.830204,0.169796
6,2,6,0,7750.691,7.8,8.0,0.718690,0.281310
8,2,8,1,7758.235,5.0,4.8,0.419489,0.580511
10,2,10,1,10806.455,4.6,5.0,0.599278,0.400722
12,2,12,0,22118.048,5.0,5.6,0.159116,0.840884
14,2,14,1,24336.379,11.0,12.4,0.797748,0.202252
16,2,16,1,7216.421,6.4,8.0,0.488293,0.511707
18,2,18,1,20652.520,10.1,11.0,0.873862,0.126138


In [10]:
# we renumber subject data for proper sequence
train_data2 = train_data.replace(train_data.subject.unique(), list(range(len(train_data.subject.unique()))))

## Hierarchical GLAM estimation

### 1. full GLAM

In [11]:
# Fitting full GLAM
print('Fitting full GLAM hierarchically...')

glam_full = glam.GLAM(train_data2)

if not os.path.exists(str('results/estimates/glam_PF2019_full_hierarchical_cv'+sufix+'.npy')):
    glam_full.make_model('hierarchical', gamma_bounds=(-1, 1), t0_val=0)
    glam_full.fit(method='NUTS', tune=1000)
else:
    print('  Found old parameter estimates in "results/estimates". Skipping estimation...')
    glam_full.estimates = np.load(str('results/estimates/glam_PF2019_full_hierarchical_cv'+sufix+'.npy'))   

Fitting full GLAM hierarchically...
Generating hierarchical model for 28 subjects...


  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...


Fitting 1 model(s) using NUTS...
  Fitting model 1 of 1...


  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, tau_sd, tau_mu, SNR, SNR_sd, SNR_mu, gamma, gamma_sd, gamma_mu, v, v_sd, v_mu]


  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1157 seconds.
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
There were 3 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.6636409691977572, but should be close to 0.8. Try to increase the number of tuning steps.
There were 5 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.46660554724996517, but should be close to 0.8. Try to increase the number of tuning steps.
There were 3 divergences after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.05 for some parameters. This indicates slight 

/!\ Automatically setting parameter precision...


In [12]:
# Save parameter estimates
np.save(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy'), glam_full.estimates)
pd.DataFrame(glam_full.estimates)

Unnamed: 0,b,p_error,v_mu,v_sd,v,gamma_mu,gamma_sd,gamma,SNR_mu,SNR_sd,SNR,s,tau_mu,tau_sd,tau,t0
0,1.0,0.05,4.2e-05,1.7e-05,1.7e-05,-0.11,0.46,0.16,211.31,106.1,271.24,0.004723,0.6,0.4,0.03,0.0
1,1.0,0.05,4.2e-05,1.7e-05,3.4e-05,-0.11,0.46,-0.51,211.31,106.1,301.57,0.008951,0.6,0.4,1.02,0.0
2,1.0,0.05,4.2e-05,1.7e-05,3.1e-05,-0.11,0.46,0.21,211.31,106.1,342.61,0.010646,0.6,0.4,0.01,0.0
3,1.0,0.05,4.2e-05,1.7e-05,3.5e-05,-0.11,0.46,0.3,211.31,106.1,206.63,0.007283,0.6,0.4,0.32,0.0
4,1.0,0.05,4.2e-05,1.7e-05,5e-05,-0.11,0.46,-0.48,211.31,106.1,154.42,0.0077,0.6,0.4,0.15,0.0
5,1.0,0.05,4.2e-05,1.7e-05,3.7e-05,-0.11,0.46,0.29,211.31,106.1,245.12,0.009987,0.6,0.4,0.03,0.0
6,1.0,0.05,4.2e-05,1.7e-05,5.1e-05,-0.11,0.46,-0.59,211.31,106.1,155.85,0.007983,0.6,0.4,0.55,0.0
7,1.0,0.05,4.2e-05,1.7e-05,8.8e-05,-0.11,0.46,-0.26,211.31,106.1,49.72,0.005133,0.6,0.4,0.05,0.0
8,1.0,0.05,4.2e-05,1.7e-05,5e-05,-0.11,0.46,-0.49,211.31,106.1,133.28,0.007579,0.6,0.4,1.09,0.0
9,1.0,0.05,4.2e-05,1.7e-05,2.4e-05,-0.11,0.46,0.37,211.31,106.1,255.26,0.007056,0.6,0.4,0.69,0.0


# estimate convergence 

## 1. Rhat parameter

In [13]:
model_trace = glam_full.trace
rhats_params = az.rhat(model_trace, method="folded")

rhats_params_df = pd.DataFrame()
rhats_params_df['gamma'] = rhats_params.gamma.values
rhats_params_df['v'] = rhats_params.v.values
rhats_params_df['tau'] = rhats_params.tau.values
rhats_params_df['s'] = rhats_params.s.values

rhats_params_df  # if |rhat - 1 | < 0.05 (rhat: gelman-rubin statistic) the sampler converged 

  rval = inputs[0].__getitem__(inputs[1:])


Unnamed: 0,gamma,v,tau,s
0,1.041482,1.010669,1.056703,1.006579
1,1.037471,1.092425,1.070733,1.056711
2,1.079211,1.045275,1.047258,1.011098
3,1.062372,1.026971,1.043778,1.046346
4,1.02653,1.034618,1.027784,1.011045
5,1.062316,1.017087,1.01989,1.004798
6,1.012911,1.059059,1.039201,1.030079
7,1.017544,1.105576,1.006274,1.10906
8,1.008626,1.081852,1.077887,1.011102
9,1.038091,1.041181,1.036396,1.01147


## 2. effective sample size

In [14]:
ess_model = az.ess(model_trace, relative=False)

ess_params_df = pd.DataFrame()
ess_params_df['gamma'] = ess_model.gamma.values
ess_params_df['v'] = ess_model.v.values
ess_params_df['tau'] = ess_model.tau.values
ess_params_df['s'] = ess_model.s.values

ess_params_df

Unnamed: 0,gamma,v,tau,s
0,129.826063,122.410773,105.415105,102.606384
1,137.256908,19.86211,113.756351,29.455708
2,41.427506,36.203434,37.236704,112.826427
3,28.659321,80.567103,119.210769,35.90366
4,186.264402,180.816222,96.442001,359.066219
5,139.980383,191.981317,95.731413,125.499401
6,73.549038,40.872295,40.344715,133.267579
7,84.298308,22.861119,66.187745,25.470931
8,97.321311,31.512538,143.324969,71.548984
9,37.949042,66.126126,69.92391,56.753961


## 3. Percentage of divergence

In [15]:
# display the total number and percentage of divergent
divergent = model_trace['diverging']
print('Number of Divergent %d' % divergent.nonzero()[0].size)
divperc = divergent.nonzero()[0].size / len(model_trace) * 100
print('Percentage of Divergent %.1f' % divperc)

Number of Divergent 11
Percentage of Divergent 0.5


In [16]:
rhats_params_df.to_csv(str('results/convergence/GlamDataPF2019_hierarch_rhatsParams'+sufix+'.csv'))
ess_params_df.to_csv(str('results/convergence/GlamDataPF2019_hierarch_essParams'+sufix+'.csv'))

# Waic Scores

In [17]:
pm.waic(model_trace,scale = 'negative_log')

  rval = inputs[0].__getitem__(inputs[1:])
See http://arxiv.org/abs/1507.04544 for details
  "For one or more samples the posterior variance of the log predictive "


Computed from 8000 by 1 log-likelihood matrix

           Estimate       SE
-elpd_waic 15481.91     0.00
p_waic        67.26        -


The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if
you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive
accuracy.

In [18]:
model_waic = pm.waic(model_trace,scale = 'negative_log')
print ('Model WAIC',model_waic.waic)

Model WAIC 15481.905746223238


See http://arxiv.org/abs/1507.04544 for details
  "For one or more samples the posterior variance of the log predictive "


In [19]:
pm.loo(model_trace,scale = 'negative_log')

  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Computed from 8000 by 1 log-likelihood matrix

          Estimate       SE
-elpd_loo 15460.29     0.00
p_loo        45.65        -


The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if
you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive
accuracy.

In [20]:
np.save(str('results/waic/glam_PF2019_full'+ sufix +'.npy'), model_waic)

In [None]:
len(model_trace.s[0])

In [None]:
            self.waic = np.array([pm.waic(trace=trace, model=model)
                                 for (trace, model) in zip(self.trace, self.model)])

In [None]:
glam_full.compute_waic()

In [None]:
# Compute WAICs
print('Computing WAIC scores for full model...')
if not os.path.exists(str('results/waic/glam_PF2019_full'+ sufix +'.npy')):
    # Note: DIC computation does not work for ADVI fitted models
    # But we are using WAIC
    glam_full.compute_waic()
else:
    print('  Found old DIC scores in "results/waic". Skipping WAIC computation...')
    glam_full.waic = np.load(str('results/waic/glam_PF2019_full'+ sufix +'.npy'))

# Compute WAICs
np.save(str('results/waic/glam_PF2019_full'+ sufix +'.npy'), glam_full.waic)

In [None]:
glam_full.waic

In [None]:
# Compute LOO

glam_full.loo = pm.loo(trace=glam_full.trace, model=glam_full.model)
glam_full.loo
np.save(str('results/loo/glam_PF2019_full'+ sufix +'.npy'), glam_full.loo)

In [None]:
glam_full.loo

In [None]:
# Predictions
print('Predicting test set data using full GLAM...')
glam_full.exchange_data(test_data)

if not os.path.exists(str('results/predictions/glam_PF2019_full_hierarchical_cv'+sufix+'.csv')):
    glam_full.predict(n_repeats=50)
    glam_full.prediction.to_csv(str('results/predictions/glam_PF2019_full_hierarchical_cv'+sufix+'.csv'), index=False)
else:
    print('  Found old hierarchical full GLAM predictions in "results/predictions". Skipping prediction...')
    glam_full.prediction = pd.read_csv(str('results/predictions/glam_PF2019_full_hierarchical_cv'+sufix+'.csv'))

glam_full.prediction.head()

### 1. no-bias GLAM

In [None]:
# Fitting no-bias GLAM
print('Fitting no-bias GLAM hierarchically...')

glam_nobias = glam.GLAM(train_data)

if not os.path.exists(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy')):
    glam_nobias.make_model('hierarchical', gamma_val=1.0, t0_val=0)
    glam_nobias.fit(method='NUTS', tune=1000)
else:
    print('  Found old parameter estimates in "results/estimates". Skipping estimation...')
    glam_nobias.estimates = np.load(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy'))
 

In [None]:
   
# Save parameter estimates
np.save(str('results/estimates/glam_PF2019_nobias_hierarchical_cv'+sufix+'.npy'), glam_nobias.estimates)
pd.DataFrame(glam_nobias.estimates)

In [None]:
# In case it is already fitted
params_part_like = pd.DataFrame.from_dict(glam_nobias.estimates.item(0))
params_part_like

In [None]:
# Compute LOO

glam_nobias.loo = pm.loo(trace=glam_nobias.trace, model=glam_nobias.model)
glam_nobias.loo

np.save(str('results/loo/glam_PF2019_nobias'+ sufix +'.npy'), glam_nobias.loo
)

In [None]:
# Predictions
print('Predicting test set data using no-bias GLAM...')
glam_nobias.exchange_data(test_data)

if not os.path.exists(str('results/predictions/glam_PF2019_nobias_hierarchical_cv'+sufix+'.csv')):
    glam_nobias.predict(n_repeats=50)
    glam_nobias.prediction.to_csv(str('results/predictions/glam_PF2019_nobias_hierarchical_cv'+sufix+'.csv'), index=False)
else:
    print('  Found old hierarchical no-bias GLAM predictions in "results/predictions". Skipping prediction...')
    glam_nobias.prediction = pd.read_csv(str('results/predictions/glam_PF2019_nobias_hierarchical_cv'+sufix+'.csv'))

glam_nobias.prediction.head()

## 2. Plot fit

In [None]:
print('Close Figure to continue...')
glam.plot_fit(test_data, [glam_full.prediction]);
#glam.plot_fit(test_data, [glam_full.prediction,glam_nobias.prediction]);

plt.show()

## Parameters for full hierarchical model

In [None]:
params_participant = glam_full.estimates
params_participant

In [None]:
params_participant = pd.DataFrame.from_dict(glam_full.estimates.item(0))

In [None]:
params_participant

In [None]:
print ("Mean gamma " +  str(params_participant['gamma'].mean()))

In [None]:
hist = params_participant[['SNR','gamma','tau','v']].hist(figsize = [20,3] , layout=[1,4],bins = 20)

## [END] 