In [1]:
import logging
import os.path

import numpy as np
import pymc as pm
import arviz as az
import aesara.tensor as at
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from scipy import stats
import pandas as pd
import warnings
import extract_correct_csv

In [2]:

warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)

scaler = StandardScaler()

valid_subject = extract_correct_csv.extract_only_valid_subject()
valid_k_list = list(range(1, 10))

k=5
i =2
num_trials_to_remove = 48

string_subject = extract_correct_csv.read_correct_subject_csv(i)
csv_ = 'data/LookAtMe_0' + string_subject + '.csv'
# csv_ = '/home/paolo/matteo/matteo/unimi/tesi_master/code/osfstorage-archive/behavior/LookAtMe_045.csv'
global_data = pd.read_csv(csv_, sep='\t')
y = np.array(list([int(d > 2) for d in global_data['rating']]))
e_labels = y[:, np.newaxis]  # rating > 2
e_labels = e_labels[num_trials_to_remove:]


TRIAL = 160

hr = pd.read_csv('data/features/hr/'+str(i)+'.csv')
hr = hr[num_trials_to_remove:]

eda = pd.read_csv('data/features/eda/' + str(i) + '.csv')
eda = eda[num_trials_to_remove:]

pupil = pd.read_csv('data/features/pupil/' + str(i) + '.csv')
pupil = pupil[num_trials_to_remove:]


In [3]:
TRIAL_DEF = TRIAL - num_trials_to_remove
TRIAL_DEF

112

In [4]:
#TRAIN_PERC = 0.75
TEST_PERC = 0.25            #1-TRAIN_PERC
N_train = int(len(pupil) * (1-TEST_PERC))
N_train

84

In [5]:
pupil_train = pupil[:N_train]
hr_train = hr[:N_train]
eda_train = eda[:N_train]
e_labels_train = e_labels[:N_train]

In [6]:
N_pupil = pupil_train.shape[0]
D_pupil = pupil_train.shape[1]

N_hr = hr_train.shape[0]
D_hr = hr_train.shape[1]

N_eda = eda_train.shape[0]
D_eda = eda_train.shape[1]

N_e = e_labels_train.shape[0]
D_e = e_labels_train.shape[1]

K = k

In [7]:
print(N_pupil, D_pupil)
print(N_hr, D_hr)
print(N_eda, D_eda)
print(N_e, D_e)

84 5
84 60
84 60
84 1


In [8]:
with pm.Model() as sPPCA:
    sPPCA.add_coord('physio_n', np.arange(N_hr), mutable=True)
    sPPCA.add_coord('physio_d', np.arange(D_hr), mutable=False)
    sPPCA.add_coord('e_label_d', np.arange(D_e), mutable=True)
    sPPCA.add_coord('K', np.arange(K), mutable=True)
    # dati osservabili
    hr_data = pm.MutableData("hr_data", hr_train.T, dims=['physio_d', 'physio_n'])
    '''pupil_data = pm.MutableData("pupil_data", pupil_train.T)
    eda_data = pm.MutableData("eda_data", eda_train.T)'''

    #e_data = pm.MutableData("e_data", e_labels_train.T)

    # matrici pesi
    Whr = pm.Normal('Whr', mu=0, sigma=2.0 * 1, dims=['physio_d', 'K'])
    '''Wpupil = pm.Normal('Wpupil', mu=at.zeros([D_pupil, K]), sigma=2.0 * at.ones([D_pupil, K]),
                       shape=[D_pupil, K])

    Weda = pm.Normal('Weda', mu=at.zeros([D_eda, K]), sigma=2.0 * at.ones([D_eda, K]), shape=[D_eda, K])'''

    # weight matrix for pain expectation.
    # check mu,sigma,shape
    We = pm.Normal('W_e', mu=0, sigma=2.0 * 1, dims=['e_label_d', 'K'])

    # latent space
    c = pm.Normal('c', mu=0, sigma=1, dims=['physio_n', 'K'])

    # dati dell'hrv interpretati come una gaussiana
    mu_hr = pm.Normal('mu_hr', Whr.dot(c.T), 1, dims=['physio_d', 'physio_n'])  # hyperprior 1
    sigma_hr = pm.Exponential('sigma_hr', 1)  # hyperprior 2
    x_hr = pm.Normal('x_hr', mu=mu_hr, sigma=sigma_hr, observed=hr_data, dims=['physio_d', 'physio_n'])

    # dati della dilatazione pupille interpretati come una gaussiana
    '''mu_pupil = pm.Normal('mu_pupil', Wpupil.dot(c.T), at.ones([D_pupil, N_pupil]))  # hyperprior 1
    sigma_pupil = pm.Exponential('sigma_pupil', at.ones([D_pupil, N_pupil]))  # hyperprior 2
    x_pupil = pm.Normal('x_pupil', mu=mu_pupil, sigma=sigma_pupil, shape=[D_pupil, N_pupil],
                        observed=pupil_data)

    # eda
    mu_eda = pm.Normal('mu_eda', Weda.dot(c.T), at.ones([D_eda, N_eda]))  # hyperprior 1
    sigma_eda = pm.Exponential('sigma_eda', at.ones([D_eda, N_eda]))  # hyperprior 2
    x_eda = pm.Normal('x_eda', mu=mu_eda, sigma=sigma_eda, shape=[D_eda, N_eda], observed=eda_data)'''

    # pain expectation. ciò che dovremmo inferire dato c
    # due strade: binary o multiclass (1-4)
    # p = probability of success?
    x_e = pm.Bernoulli('x_e', p=pm.math.sigmoid(We.dot(c.T)), dims=['e_label_d', 'physio_n'], observed=e_labels_train.T)

    # x_hr = pm.Bernoulli('x_hr', p=pm.math.sigmoid(Whr.dot(c.T)), shape=[D_hr, N_hr], observed=hr_data)
    # x_eda = pm.Bernoulli('x_eda', p=pm.math.sigmoid(Weda.dot(c.T)), shape=[D_eda, N_eda], observed=eda_data)

In [9]:
gv = pm.model_to_graphviz(sPPCA)
gv.view('unpooled_model')

'unpooled_model.pdf'

In [10]:
with sPPCA:
    approx = pm.fit(1000, callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-4)])
    trace = approx.sample(500)

Finished [100%]: Average Loss = 1.6886e+10


In [11]:
pupil_test = pupil[N_train:].reset_index().drop(columns=['index'])
hr_test = hr[N_train:].reset_index().drop(columns=['index'])
eda_test = eda[N_train:].reset_index().drop(columns=['index'])
e_test = e_labels[N_train:]
len(hr_test)

28

In [12]:
with sPPCA:
    posterior_pred = pm.sample_posterior_predictive(
        trace,var_names=["x_e"], random_seed=123)

Sampling: [x_e]


In [13]:
posterior_pred.posterior_predictive['x_e'].shape

(1, 500, 1, 84)

In [14]:
# az.plot_trace(trace);
with sPPCA:
    # update values of predictors:
    sPPCA.set_data("hr_data", hr_test.T, coords={'physio_n': range(hr_test.shape[0])})
    '''sPPCA.set_data("pupil_data", pupil_test, coords={'obs_id': range(len(pupil_test))})
    sPPCA.set_data("eda_data", eda_test, coords={'obs_id': range(len(eda_test))})'''
    # use the updated values and predict outcomes and probabilities:

    posterior_predictive = pm.sample_posterior_predictive(
        trace,var_names=["x_e"], random_seed=123, predictions=True)

Sampling: [c, x_e]


In [15]:
e_pred = posterior_predictive.predictions['x_e']
e_pred_mode = np.squeeze(stats.mode(e_pred[0], keepdims=False)[0])[:, np.newaxis]
e_pred.shape

(1, 500, 1, 28)

In [16]:
e_test

array([[1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1]])

In [17]:
train_accuracy_exp = accuracy_score(e_test, e_pred_mode)
print('\n\tTrain Accuracy Pain Expectation: ' + str(train_accuracy_exp))
print(' ')


	Train Accuracy Pain Expectation: 0.5357142857142857
 


In [None]:
eda_pred = np.squeeze(eda_pred.mean('draw', keepdims='false')[0]).to_numpy()

In [None]:
edapred_ = eda_pred.T

eda_ = eda.to_numpy()

corrlist = []
for i in range(112):
    res = np.corrcoef(eda_[i], edapred_[i])[0][1]
    corrlist.append(res)
    print('trial '+str(i)+ ' corr: '+str(res.round(3)))

In [None]:
def ccc(x, y):
    ''' Concordance Correlation Coefficient'''
    sxy = np.sum((x - x.mean()) * (y - y.mean())) / x.shape[0]
    rhoc = 2 * sxy / (np.var(x) + np.var(y) + (x.mean() - y.mean()) ** 2)
    return rhoc

In [None]:
ccc(edapred_[0],eda_[0])

In [None]:
eda_[0]

In [None]:
edapred_[0].shape

In [None]:
x=eda_[0]
y=eda_pred[0]

In [None]:
print(x.shape)
print(y.shape)

In [None]:
x- x.mean()

In [None]:
y-y.mean()

In [None]:
(x - x.mean()) * (y - y.mean())


In [None]:
sxy= np.sum() / x.shape[0]

In [None]:
conc = ccc(eda_[0], eda_pred[0])

In [None]:
pearson_list = []
concord_list = []
for i in range(112):
    pear = np.corrcoef(eda_[i], edapred_[i])[0][1]
    conc = ccc(eda_[i], eda_pred[i])
    pearson_list.append(pear)
    concord_list.append(conc)
    # print('trial ' + str(i) + ' corr: ' + str(res.round(3)))

In [None]:
mean_pear = round(np.mean(pearson_list), 4)
mean_corc = round(np.mean(concord_list), 4)

In [None]:
mean_subj = round(np.mean(corrlist),4)


        logging.basicConfig(level=logging.INFO, filename="logfile", filemode="a+",
                            format="%(asctime)-15s %(levelname)-8s %(message)s")
        logging.info("Mean corr coeff eda-hr using subj: " + str(subj_) + " " + str(round(mean_subj, 2)) + " script: " +
             os.path.basename(__file__) + "latent space dims: " + str(K))
