In [1]:
import pymc as pm
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import extract_correct_csv

print(f"PyMC version: {pm.__version__}")


PyMC version: 4.2.2


## Ridge classifier with sklearn

In [2]:
valid_subject = extract_correct_csv.extract_only_valid_subject()
valid_subject.remove(49)
valid_subject

[2, 4, 10, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55]

In [14]:
columns = ['subject', 'type', 'train accuracy', 'test accuracy']
results = pd.DataFrame(columns = columns)
index = 1
for x in valid_subject:
    string_sub = extract_correct_csv.read_correct_subject_csv(x)
    df_ = pd.read_csv('data/LookAtMe_0'+string_sub+'.csv', sep='\t')
    y = np.array(list([int (d > 2) for d in df_['rating']]))
    y = y[48:]
    types_ = ['hr', 'eda', 'pupil']
    for type_ in types_:
        X = pd.read_csv('data/features_4_2/'+type_+'/45.csv')
        X = X[48:]
        # creating train and test
        X_train, X_test,\
        y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=123, stratify=y)
        clf = RidgeClassifier()
        clf.fit(X_train, y_train)
        train_ = clf.score(X_train, y_train)
        test_ = clf.score(X_test, y_test)
        dict_ = {'subject': x, 'type': type_, 'train accuracy': train_, 'test accuracy': test_}
        results = pd.concat([results, pd.DataFrame(data=dict_, index=np.arange(1))], ignore_index=True)
        index = index + 1

In [15]:
results.loc[results.groupby(by=['subject'])['train accuracy'].idxmax()]

Unnamed: 0,subject,type,train accuracy,test accuracy
1,2,eda,0.707865,0.652174
5,4,pupil,0.685393,0.608696
7,10,eda,0.741573,0.608696
10,41,eda,0.820225,0.782609
13,43,eda,0.820225,0.869565
17,44,pupil,0.696629,0.652174
19,45,eda,0.853933,0.782609
23,46,pupil,0.775281,0.434783
25,47,eda,0.775281,0.608696
28,48,eda,0.752809,0.565217


In [16]:
results.groupby(by='type')['test accuracy'].mean()

type
eda      0.652174
hr       0.518841
pupil    0.571014
Name: test accuracy, dtype: float64

# Multi physio classifier

In [17]:
subjects = extract_correct_csv.extract_only_valid_subject()

df_ = pd.read_csv('data/LookAtMe_002.csv', sep='\t')
y = np.array(list([int (d > 2) for d in df_['rating']]))
y = y[48:]

mean_train = []
mean_test = []
scaler = StandardScaler()

for sub in subjects:
    X1 = pd.read_csv('data/features_4_2/hr/'+str(sub)+'.csv')
    X1 = pd.DataFrame(scaler.fit_transform(X1))
    X2 = pd.read_csv('data/features_4_2/eda/'+str(sub)+'.csv')
    X2 = pd.DataFrame(scaler.fit_transform(X2))
    X3 = pd.read_csv('data/features_4_2/pupil/'+str(sub)+'.csv')
    X3 = pd.DataFrame(scaler.fit_transform(X3))
    X = pd.concat([X1, X2, X3], axis=1)
    X = X[48:]
    X_train, X_test,y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123, stratify=y)

    classifier = RidgeClassifier()
    classifier.fit(X_train, y_train)
    mean_train.append(classifier.score(X_train, y_train))
    mean_test.append(classifier.score(X_test,y_test))

In [18]:
np.mean(np.array(mean_train))

0.8096497025776602

In [19]:
np.mean(np.array(mean_test))

0.6138107416879796

# MLP

In [20]:
from sklearn.neural_network import MLPClassifier
valid_subject = extract_correct_csv.extract_only_valid_subject()
valid_subject.remove(50)
valid_subject.remove(51)

mean_MLP_train = []
mean_MLP_test = []
for sub in valid_subject:
    string_sub = extract_correct_csv.read_correct_subject_csv(sub)
    df_ = pd.read_csv('data/LookAtMe_0'+string_sub+'.csv', sep='\t')
    y = np.array(list([int (d > 2) for d in df_['rating']]))
    y = y[48:]


    X1 = pd.read_csv('data/features_4_2/hr/'+str(sub)+'.csv')
    X1 = pd.DataFrame(scaler.fit_transform(X1))
    X2 = pd.read_csv('data/features_4_2/eda/'+str(sub)+'.csv')
    X2 = pd.DataFrame(scaler.fit_transform(X2))
    X3 = pd.read_csv('data/features_4_2/pupil/'+str(sub)+'.csv')
    X3 = pd.DataFrame(scaler.fit_transform(X3))

    X = pd.concat([X1, X2, X3], axis=1)
    X = X[48:]
    X_train, X_test,y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=123, stratify=y)
    clf = MLPClassifier(hidden_layer_sizes=100, max_iter=700, learning_rate='adaptive', random_state=123).fit(X_train, y_train)
    mean_MLP_train.append(clf.score(X_train, y_train))
    mean_MLP_test.append(clf.score(X_test, y_test))

In [21]:
np.mean(np.array(mean_MLP_train))

1.0

In [22]:
np.mean(np.array(mean_MLP_test))

0.6627450980392156

In [23]:
np.std(np.array(mean_MLP_test))

0.07583952786593612