In [14]:
import logging
import os.path

In [15]:
import numpy as np
import pymc as pm
import arviz as az
import aesara.tensor as at
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from scipy import stats
import pandas as pd
import warnings
import extract_correct_csv

In [16]:
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)

In [17]:
scaler = StandardScaler()

In [18]:
prova_3_subj = extract_correct_csv.extract_only_valid_subject()
valid_k_list = list(range(1, 10))

In [19]:
global_e_labels = []
global_subject = []

In [20]:
num_trials_to_remove = 48

In [21]:
TEST_PERC = 0.25            #1-TRAIN_PERC

In [22]:
logging.basicConfig(level=logging.INFO, filename="log/complete_pooled/pooled_label_newsplit", filemode="a+",
                    format="%(asctime)-15s %(levelname)-8s %(message)s")

In [23]:
 TRIAL = (160 - num_trials_to_remove)* len(prova_3_subj)

In [32]:
for k in valid_k_list:
    K=k
    global_e_labels_train = []
    global_e_labels_test = []

    global_subject = []
    global_pupil_train = pd.DataFrame()
    global_pupil_test = pd.DataFrame()

    global_hr_train = pd.DataFrame()
    global_hr_test = pd.DataFrame()

    global_eda_train = pd.DataFrame()
    global_eda_test = pd.DataFrame()

    for i in prova_3_subj:
        string_subject = extract_correct_csv.read_correct_subject_csv(i)
        csv_ = 'data/LookAtMe_0' + string_subject + '.csv'
        global_data = pd.read_csv(csv_, sep='\t')
        y = np.array(list([int(d > 2) for d in global_data['rating']]))
        e_labels = y[:, np.newaxis]  # rating > 2
        e_labels = e_labels[num_trials_to_remove:]


        hr = pd.read_csv('data/features/hr/'+str(i)+'.csv')
        hr = hr[num_trials_to_remove:]

        eda = pd.read_csv('data/features/eda/' + str(i) + '.csv')
        eda = eda[num_trials_to_remove:]

        pupil = pd.read_csv('data/features/pupil/' + str(i) + '.csv')
        pupil = pupil[num_trials_to_remove:]

        N_train = int(len(pupil) * (1-TEST_PERC))

        pupil_train = pupil[:N_train]
        hr_train = hr[:N_train]
        eda_train = eda[:N_train]
        e_labels_train = e_labels[:N_train]


        subject = np.array(list([s for s in global_data['subject']]))[:, np.newaxis]
        subject = subject[num_trials_to_remove:]
        subject_train = subject[:N_train]
        global_subject = global_subject + subject_train.tolist()

        pupil_test = pupil[N_train:]
        hr_test = hr[N_train:]
        eda_test = eda[N_train:]
        e_test = e_labels[N_train:]
        e_labels_test = e_labels[N_train:]

        global_e_labels_train = global_e_labels_train + e_labels_train.tolist()
        global_e_labels_test = global_e_labels_test + e_labels_test.tolist()

        global_pupil_train = pd.concat([global_pupil_train,pd.DataFrame(pupil_train)])
        global_pupil_test = pd.concat([global_pupil_test,pd.DataFrame(pupil_test)])

        global_hr_train = pd.concat([global_hr_train,pd.DataFrame(hr_train)])
        global_hr_test = pd.concat([global_hr_test,pd.DataFrame(hr_test)])

        global_eda_train = pd.concat([global_eda_train,pd.DataFrame(eda_train)])
        global_eda_test = pd.concat([global_eda_test,pd.DataFrame(eda_test)])


    global_e_labels_train = np.array(global_e_labels_train)
    global_e_labels_test = np.array(global_e_labels_test)


    N_pupil = global_pupil_train.shape[0]
    D_pupil = global_pupil_train.shape[1]

    N_hr = global_hr_train.shape[0]
    D_hr = global_hr_train.shape[1]

    N_eda = global_eda_train.shape[0]
    D_eda = global_eda_train.shape[1]

    N_e = global_e_labels_train.shape[0]
    D_e = global_e_labels_train.shape[1]

    #print(N_pupil)
    #print(D_pupil)
    #print(N_hr)
    #print(D_hr)
    #print(N_eda)
    #print(D_eda)
    #print(N_e)
    #print(D_e)

    with pm.Model() as sPPCA:
        sPPCA.add_coord('physio_n', np.arange(N_hr), mutable=True)
        sPPCA.add_coord('physio_d', np.arange(D_hr), mutable=False)
        sPPCA.add_coord('e_label_d', np.arange(D_e), mutable=True)
        sPPCA.add_coord('K', np.arange(K), mutable=True)
        sPPCA.add_coord('pupil_d', np.arange(D_pupil), mutable=True)

        # dati osservabili
        hr_data = pm.MutableData("hr_data", global_hr_train.T, dims=['physio_d', 'physio_n'])
        pupil_data = pm.MutableData("pupil_data", global_pupil_train.T, dims=['pupil_d', 'physio_n'])
        eda_data = pm.MutableData("eda_data", global_eda_train.T, dims=['physio_d', 'physio_n'])

        # e_data = pm.MutableData("e_data", e_labels_train.T)

        # matrici pesi
        Whr = pm.Normal('Whr', mu=0, sigma=2.0 * 1, dims=['physio_d', 'K'])
        Wpupil = pm.Normal('Wpupil', mu=0, sigma=2.0 * 1, dims=['pupil_d', 'K'])
        Weda = pm.Normal('Weda', mu=0, sigma=2.0 * 1, dims=['physio_d', 'K'])

        # weight matrix for pain expectation.
        # check mu,sigma,shape
        We = pm.Normal('W_e', mu=0, sigma=2.0 * 1, dims=['e_label_d', 'K'])

        # latent space
        c = pm.Normal('c', mu=0, sigma=1, dims=['physio_n', 'K'])

        # dati dell'hrv interpretati come una gaussiana
        mu_hr = pm.Normal('mu_hr', Whr.dot(c.T), 1, dims=['physio_d', 'physio_n'])  # hyperprior 1
        sigma_hr = pm.Exponential('sigma_hr', 1)  # hyperprior 2
        x_hr = pm.Normal('x_hr', mu=mu_hr, sigma=sigma_hr, observed=hr_data, dims=['physio_d', 'physio_n'])

        # dati della dilatazione pupille interpretati come una gaussiana
        mu_pupil = pm.Normal('mu_pupil', Wpupil.dot(c.T), 1, dims=['pupil_d', 'physio_n'])  # hyperprior 1
        sigma_pupil = pm.Exponential('sigma_pupil', 1)  # hyperprior 2
        x_pupil = pm.Normal('x_pupil', mu=mu_pupil, sigma=sigma_pupil, dims=['pupil_d', 'physio_n'],
                            observed=pupil_data)

        # eda
        mu_eda = pm.Normal('mu_eda', Weda.dot(c.T), 1, dims=['physio_d', 'physio_n'])  # hyperprior 1
        sigma_eda = pm.Exponential('sigma_eda', 1)  # hyperprior 2
        x_eda = pm.Normal('x_eda', mu=mu_eda, sigma=sigma_eda, dims=['physio_d', 'physio_n'], observed=eda_data)

        # pain expectation. ciò che dovremmo inferire dato c
        # due strade: binary o multiclass (1-4)
        # p = probability of success?
        x_e = pm.Bernoulli('x_e', p=pm.math.sigmoid(We.dot(c.T)), dims=['e_label_d', 'physio_n'],
                           observed=global_e_labels_train.T)


    #gv = pm.model_to_graphviz(sPPCA)
    #gv.view('complete_pooling')

    with sPPCA:
        approx = pm.fit(100000, callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-4)])
        trace = approx.sample(500)
    name = 'complete_pooled/ADVI_100000_k' + str(k) + '_allsubjects_'
    trace.posterior.to_netcdf(name + 'posterior.h5', engine='scipy')

    # from xarray import open_dataset

    # posterior = open_dataset('posterior.h5', engine='scipy')

    with sPPCA:
        posterior_pred = pm.sample_posterior_predictive(
            trace, var_names=["x_e"], random_seed=123)

    break

    # az.plot_trace(trace);
    with sPPCA:
        # update values of predictors:
        sPPCA.set_data("hr_data", global_hr_test.T, coords={'physio_n': range(global_hr_test.shape[0])})
        sPPCA.set_data("pupil_data", global_pupil_test.T, coords={'physio_n': range(global_pupil_test.shape[0])})
        sPPCA.set_data("eda_data", global_eda_test.T, coords={'physio_n': range(global_eda_test.shape[0])})
        # use the updated values and predict outcomes and probabilities:
        posterior_predictive = pm.sample_posterior_predictive(
            trace, var_names=["x_e"], random_seed=123, predictions=True)

    e_pred = posterior_predictive.predictions['x_e']
    e_pred_mode = np.squeeze(stats.mode(e_pred[0], keepdims=False)[0])[:, np.newaxis]

    test_accuracy_exp = accuracy_score(global_e_labels_test, e_pred_mode)
    logging.info("ADVI_100000 Test Acc Pain Expect: " + str(test_accuracy_exp) + " script: " +
                 os.path.basename(__file__) + ", ft extr HR e EDA: wav" +
                 ', ft extr PUP: mean, lat space dims: ' + str(K))

1428
5
1428
60
1428
60
1428
1


qt5ct: using qt5ct plugin
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/16/"
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/16@2x/"
Invalid Context= "Mimetypes" line for icon theme:  "/usr/share/icons/Mint-Y/mimetypes/16/"
Invalid Context= "Mimetypes" line for icon theme:  "/usr/share/icons/Mint-Y/mimetypes/16@2x/"
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/22/"
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/22@2x/"
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/24/"
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/24@2x/"
Invalid Context= "Mimetypes" line for icon theme:  "/usr/share/icons/Mint-Y/mimetypes/24/"
Invalid Context= "Mimetypes" line for icon theme:  "/usr/share/icons/Mint-Y/mimetypes/24@2x/"
Invalid Context= "Apps" line for icon theme:  "/usr/share/icons/Mint-Y/apps/32/"
Invalid Context= "Apps" line

Interrupted at 1,124 [11%]: Average Loss = 2.1463e+11

KeyboardInterrupt

