In [1]:
import pymc as pm
import aesara.tensor as at
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.random import default_rng
from scipy import stats
from sklearn.metrics import accuracy_score


RANDOM_SEED = 31415
rng = default_rng(RANDOM_SEED)



In [2]:
sub = 2
num_trials_to_remove = 48

# eda data
eda = pd.read_csv('data/features_4_2/eda/'+str(sub)+'.csv')
eda = eda[num_trials_to_remove:]

# hr data
hr = pd.read_csv('data/features_4_2/hr/'+str(sub)+'.csv')
hr = hr[num_trials_to_remove:]

# pupil data
pupil = pd.read_csv('data/features_4_2/pupil/'+str(sub)+'.csv')
pupil = pupil[num_trials_to_remove:]

# pain expectation data
df_ = pd.read_csv('data/LookAtMe_00'+str(sub)+'.csv', sep='\t')
df_ = df_[num_trials_to_remove:]
label = np.array(list([int(d>2) for d in df_['rating']]))
E = label[:,np.newaxis]
E = pd.DataFrame(E)

# num trials
N = eda.shape[1]

TRAIN_PERC = 0.70
VAL_PERC = 0.1
TEST_PERC = 0.2  # 1-TRAIN_PERC+VAL_PERC
N_train = int(len(eda) * (TRAIN_PERC))
N_val = int(len(eda) * (VAL_PERC))

# RANDOM SPLIT
pupil = pupil.sample(frac=1, random_state=0)
pupil = pupil.reset_index(drop=True).to_numpy()

hr = hr.sample(frac=1, random_state=0)
hr = hr.reset_index(drop=True).to_numpy()

eda = eda.sample(frac=1, random_state=0)
eda = eda.reset_index(drop=True).to_numpy()

e_labels = E.sample(frac=1, random_state=0)
e_labels = e_labels.reset_index(drop=True).to_numpy()

hr_train = hr[:N_train]
eda_train = eda[:N_train].T
pupil_train = pupil[:N_train].T
e_labels_train = e_labels[:N_train].T


hr_val = hr[N_train:N_train + N_val]
eda_val = eda[N_train:N_train + N_val].T
pupil_val = pupil[N_train:N_train + N_val].T
e_labels_val = e_labels[N_train:N_train + N_val].T

hr_test = hr[N_train + N_val:].T
eda_test = eda[N_train + N_val:].T
pupil_test = pupil[N_train+N_val:].T
e_test = e_labels[N_train + N_val:].T


# dimensions of each signal
d_eda = eda_train.shape[0]
d_hr = hr_train.shape[1]
d_pupil = pupil_train.shape[0]
d_e = e_labels_train.shape[0]

FileNotFoundError: [Errno 2] No such file or directory: 'data/features_4_2/eda/2.csv'

In [None]:
d_hr

In [None]:
eda_val.shape[1]

In [None]:
def expand_packed_block_triangular(d, k, packed, diag=None, mtype="aesara"):
    # like expand_packed_triangular, but with d > k.
    assert mtype in {"aesara", "numpy"}
    assert d >= k

    def set_(M, i_, v_):
        if mtype == "aesara":
            return at.set_subtensor(M[i_], v_)
        M[i_] = v_
        return M

    out = at.zeros((d, k), dtype=float) if mtype == "aesara" else np.zeros((d, k), dtype=float)
    if diag is None:
        idxs = np.tril_indices(d, m=k)
        out = set_(out, idxs, packed)
    else:
        idxs = np.tril_indices(d, k=-1, m=k)
        out = set_(out, idxs, packed)
        idxs = (np.arange(k), np.arange(k))
        out = set_(out, idxs, diag)
    return out

In [None]:
def makeW(d, k, dim_names,name):
    # make a W matrix adapted to the data shape
    n_od = int(k * d - k * (k - 1) / 2 - k)
    # trick: the cumulative sum of z will be positive increasing
    z = pm.HalfNormal("W_z_"+name, 1.0, dims="latent_columns")
    b = pm.HalfNormal("W_b_"+name, 1.0, shape=(n_od,), dims="packed_dim")
    L = expand_packed_block_triangular(d, k, b, at.ones(k))
    W = pm.Deterministic(name, at.dot(L, at.diag(at.extra_ops.cumsum(z))), dims=dim_names)
    return W


In [None]:
k = 3


In [None]:
with pm.Model() as PPCA_identified:
    #model coordinates
    PPCA_identified.add_coord("latent_columns", np.arange(k), mutable=True)
    PPCA_identified.add_coord("rows", np.arange(N_train), mutable=True)
    PPCA_identified.add_coord("observed_eda", np.arange(d_eda), mutable=False)
    PPCA_identified.add_coord("observed_hr", np.arange(d_hr), mutable=False)
    PPCA_identified.add_coord("observed_pupil", np.arange(d_pupil), mutable=False)
    PPCA_identified.add_coord("observed_label", np.arange(d_e), mutable=False)


    hr_data = pm.MutableData("hr_data", hr_train, dims=("observed_hr", "rows"))
    eda_data = pm.MutableData("eda_data", eda_train, dims=("observed_eda", "rows"))
    pupil_data = pm.MutableData("pupil_data", pupil_train, dims=("observed_pupil", "rows"))

    W_eda = makeW(d_eda, k, ("observed_eda", "latent_columns"),'W_eda')
    W_hr = makeW(d_hr, k, ("observed_hr", "latent_columns"),'W_hr')
    W_pupil = pm.Normal("W_pupil", dims=("observed_pupil", "latent_columns"))

    W_e = pm.Normal("W_e", dims=("observed_label", "latent_columns"))
    C = pm.Normal("C", dims=("latent_columns", "rows"))
    psi_eda = pm.HalfNormal("psi_eda", 1.0)
    X_eda = pm.Normal("X_eda", mu=at.dot(W_eda, C), sigma=psi_eda, observed=eda_data, dims=("observed_eda", "rows"))

    psi_hr = pm.HalfNormal("psi_hr", 1.0)
    X_hr = pm.Normal("X_hr", mu=at.dot(W_hr, C), sigma=psi_hr, observed=hr_data, dims=("observed_hr", "rows"))

    psi_pupil = pm.HalfNormal("psi_pupil", 1.0)
    X_pupil = pm.Normal("X_pupil", mu=at.dot(W_pupil, C), sigma=psi_pupil, observed=pupil_data, dims=("observed_pupil", "rows"))

    X_e = pm.Bernoulli("X_e", p=pm.math.sigmoid(at.dot(W_e, C)), dims=("observed_label", "rows"), observed=e_labels_train)

In [None]:
gv = pm.model_to_graphviz(PPCA_identified)
gv.view('PPCA example')

In [None]:
with PPCA_identified:
    approx = pm.fit(30000, callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-4)])
    trace = approx.sample(500)

In [None]:
with PPCA_identified:
    posterior_predictive = pm.sample_posterior_predictive(
        trace, var_names=["X_e"], random_seed=123)

In [None]:
e_pred_train = posterior_predictive.posterior_predictive['X_e']
e_pred_mode_train = np.squeeze(stats.mode(e_pred_train[0], keepdims=False)[0])[:, np.newaxis]

train_accuracy_exp = accuracy_score(e_labels_train.T, e_pred_mode_train)
train_accuracy_exp