In [1]:
import pymc as pm
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
from scipy import stats
from sklearn.metrics import accuracy_score
import aesara.tensor as at

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [3]:
sub = 2

X = pd.read_csv('../data/features_4_2/hr/'+str(sub)+'.csv')
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
X = X[48:]

df_ = pd.read_csv('../data/LookAtMe_002.csv', sep='\t')
y = np.array(list([int(d > 2) for d in df_['rating']]))[:, np.newaxis]
y = pd.DataFrame(y)
y = y[48:]

sss = StratifiedShuffleSplit(n_splits=3, test_size=0.2, random_state=123)

for i, (train_index, test_index) in enumerate(sss.split(X, y)):
    X_train = X.iloc[train_index,:]
    y_train = y.iloc[train_index,:]

    X_test = X.iloc[test_index,:]
    y_test = y.iloc[test_index,:]
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

    with pm.Model() as GLM:
        x_data = pm.MutableData('x_data', X_train)
        y_data = pm.MutableData('y_data', y_train)

        #intercept = pm.Normal("intercept", 0, 1, shape=y_data.shape)
        slope = pm.Normal("slope", shape=(x_data.shape[1], 1))
        likelihood = pm.Bernoulli('likelihood', p=pm.math.sigmoid(at.dot(x_data, slope)), observed=y_data)
        approx = pm.fit(100000, callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-4)])
        trace = approx.sample(1000)
        posterior_predictive = pm.sample_posterior_predictive(
            trace, var_names=["likelihood"], random_seed=123)
    prediction_y = posterior_predictive.posterior_predictive['likelihood']
    e_pred_mode_train = np.squeeze(stats.mode(prediction_y[0], keepdims=False)[0])[:, np.newaxis]

    train_accuracy_exp = accuracy_score(y_train, e_pred_mode_train)
    print(train_accuracy_exp)

    with GLM:
        pm.set_data({'x_data': X_test})
        pm.set_data({'y_data': y_test})
        posterior_pred = pm.sample_posterior_predictive(trace, var_names=['likelihood'], random_seed=123, predictions=True)

    prediction_y_test = posterior_pred.predictions['likelihood']
    e_pred_mode_test = np.squeeze(stats.mode(prediction_y_test[0], keepdims=False)[0])[:, np.newaxis]

    print(e_pred_mode_test.shape)

    test_accuracy_exp = accuracy_score(y_test, e_pred_mode_test)

    print(test_accuracy_exp)

    break

(89, 24) (89, 1) (23, 24) (23, 1)


Finished [100%]: Average Loss = 94.105
Sampling: [likelihood]


0.5730337078651685


Sampling: [likelihood, slope]


(23, 1)
0.4782608695652174


In [4]:
a_trained = trace.posterior['slope'][0]
print(a_trained.shape)
y_new = at.matmul(X_test, np.array(a_trained))

(1000, 24, 1)


In [5]:
y_new = np.where(y_new.eval() < 0, 0, 1)
y_new_pred = stats.mode(y_new, keepdims=False)[0]
accuracy_score(y_test, y_new_pred)

0.6086956521739131