In [1]:
#import
import os
from functools import partial
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import pyro
import pyro.distributions as dist
import warnings
from pandas.errors import SettingWithCopyWarning
from pyro.infer import MCMC, NUTS

# for CI testing
warnings.simplefilter(action='ignore', category=SettingWithCopyWarning)
smoke_test = ('CI' in os.environ)
assert pyro.__version__.startswith('1.8.4')
pyro.set_rng_seed(1)


# Set matplotlib settings
%matplotlib inline
plt.style.use('default')

In [32]:
# Extract morphing level and shock (binary morphing level)

# Equal for all subjects
df = pd.read_csv('../data/newLookAtMe/newLookAtMe20.csv')
data = df[['morphing level', 'shock']]
data['shock'] = data['shock'].astype(int)
data['morphing level'] = [int(d==6) for d in data['morphing level']]

Create tensor where:
* `[0 0] = 0`
* `[1 0] = 1`
* `[1 1] = 2`

In [33]:
data_model = data.to_numpy()
data_final = []
for x in data_model:
    if (x == [0, 0]).all():
        data_final.append(0)
    elif (x == [1, 0]).all():
        data_final.append(1)
    else:
        data_final.append(2)

data_final = torch.tensor(data_final)
data_final

tensor([0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 2, 0, 2, 2, 0,
        1, 2, 0, 0, 0, 0, 2, 1, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 2, 0, 2, 0,
        0, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 1, 0, 1, 0, 0, 2, 0, 2, 0, 0, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 2,
        0, 0, 1, 0, 2, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0,
        0, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 2, 0,
        0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1])

$p(data\, final | \theta) = \prod_i^3\theta_k^{N_k}$

where $\theta$ is a vector 3-dimensional modeled with a Dirichlet Distribution

In [34]:
# model definition

# uniform prior
prior_ = torch.ones(3)

def model(data):
    theta = pyro.sample('theta', dist.Dirichlet(prior_))
    with pyro.plate('data', len(data)):
        pyro.sample('obs', dist.Categorical(theta), obs=data)

In [35]:
# MCMC sampling
nuts_kernel = NUTS(model)
num_samples, warmup_steps = (1000, 200) if not smoke_test else (10, 10)
mcmc = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps)
mcmc.run(data_final)
hmc_samples = {k: v.detach().cpu().numpy()
               for k, v in mcmc.get_samples().items()}

Sample: 100%|██████████| 1200/1200 [00:09, 120.03it/s, step size=1.06e+00, acc. prob=0.881]


In [39]:
hmc_samples['theta'].mean(axis=0)

array([0.64456666, 0.17899393, 0.17643872], dtype=float32)

La cella sopra indica le probabilità di vedere una coppia [0 0], una coppia [1 0] o una coppia [1 1], ma non $P(shock | morph\, level)$.