In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [7]:
#Convert Personality to DataFrame
personality = loadmat('data/Dt_Personality.mat')
personality = personality['Personality']
big5 = pd.DataFrame(personality, columns=['extro', 'agree', 'cons', 'stability', 'open'])
big5.head()

Unnamed: 0,extro,agree,cons,stability,open
0,5.4,5.9,5.2,5.1,5.2
1,3.0,5.1,4.3,3.7,4.6
2,3.9,5.4,4.4,4.8,3.9
3,2.9,4.1,5.6,5.7,4.0
4,3.2,4.8,6.1,5.7,4.7


In [104]:
ECG = loadmat('data/Dt_ECGFeatures.mat')
ECG_feats_mat = ECG['ECGFeatures_58']
ECG_fails_mat = ECG['ECGFailures_58'] #Ved ikke, om denne er nødvendig

In [93]:
def unfold_mat(mat, feats=None):
    #Returns list with each index being participant.
    #Each item has dataframe with shape video x feature (row is video, columns are features)
    lst = []
    for i in mat:
        lst.append(pd.DataFrame(i, columns=feats))
    
    return lst

def statistical_measurements(suffix):
    cols = []
    names = ['mean', 'std', 'skewness', 'kurtosis', '%time_over_mean+std', '%time_over_mean-std']
    for i in range(6):
        cols.append(names[i]+'_'+suffix)
    return cols

In [105]:
#Isolate ECG (Heart Monitor) Features
ECG_cols = ['low_freq_PSD' for x in range(10)]
[ECG_cols.append('slow_response_PSD') for x in range(4)]
[ECG_cols.append(x) for x in statistical_measurements('IBI')]
[ECG_cols.append(x) for x in statistical_measurements('HR')]
[ECG_cols.append(x) for x in statistical_measurements('HRV')]


ECG_feats = unfold_mat(ECG_feats_mat[0], feats=ECG_cols)
ECG_feats[0].head()

Unnamed: 0,low_freq_PSD,low_freq_PSD.1,low_freq_PSD.2,low_freq_PSD.3,low_freq_PSD.4,low_freq_PSD.5,low_freq_PSD.6,low_freq_PSD.7,low_freq_PSD.8,low_freq_PSD.9,...,skewness_HR,kurtosis_HR,%time_over_mean+std_HR,%time_over_mean-std_HR,mean_HRV,std_HRV,skewness_HRV,kurtosis_HRV,%time_over_mean+std_HRV,%time_over_mean-std_HRV
0,,,,,0.0,0.0,,,,,...,5.462701,5.173039,2.429736,1.114206,1.80892,6.739586,2.242985,-0.512542,-1.452407,0.752147
1,,,,,0.0,0.0,,,,,...,8.288437,6.699335,6.104294,3.37424,3.580793,5.524211,1.667694,1.235405,0.483075,0.979476
2,,,,,0.0,0.0,,,,,...,4.335125,6.910288,3.306808,2.661825,2.705716,6.734335,1.475662,-0.905105,-0.126162,0.996687
3,,,,,0.0,0.0,,,,,...,6.472956,4.408273,2.849756,1.988453,3.265623,6.658276,3.203142,-0.238137,0.429959,1.556095
4,,,,,0.0,0.0,,,,,...,7.806054,9.149439,5.423162,3.358165,3.213917,6.315864,0.41249,0.381669,-0.10542,1.579525


In [123]:
EEG = loadmat('data/Dt_EEGFeatures.mat')
EEG_feats_mat = ECG['EEGFeatures_58']
EEG_fails_mat = ECG['EEGFailures_58']

#Isolate EEG features
EEG_cols = []
[EEG_cols.append(x) for x in ['avg_first_deriv', 'prop_neg_diff_samples', 'mean_num_peaks', 'mean_deriv_inv_signal', 'avg_num_peaks_inv_signal']]
[EEG_cols.append(x) for x in statistical_measurements('EEG')]
[EEG_cols.append('attention') for x in range(11)]
[EEG_cols.append('meditation') for x in range(11)]
[EEG_cols.append('alpha') for x in range(11)]
[EEG_cols.append('beta') for x in range(11)]
[EEG_cols.append('delta') for x in range(11)]
[EEG_cols.append('gamma') for x in range(11)]
[EEG_cols.append('theta') for x in range(11)]

EEG_feats = unfold_mat(EEG_feats_mat[0], feats=EEG_cols)
EEG_feats[0].head()

Unnamed: 0,avg_first_deriv,prop_neg_diff_samples,mean_num_peaks,mean_deriv_inv_signal,avg_num_peaks_inv_signal,mean_EEG,std_EEG,skewness_EEG,kurtosis_EEG,%time_over_mean+std_EEG,...,theta,theta.1,theta.2,theta.3,theta.4,theta.5,theta.6,theta.7,theta.8,theta.9
0,320.191443,82.717586,0.622098,8.653932,0.118051,0.103685,-0.035643,0.490625,0.71955,0.277515,...,57.904773,0.102995,2.389704,0.14366,0.178014,-0.057096,0.565,0.099938,1.121837,0.099938
1,327.382886,72.859283,0.591914,7.812108,0.086196,0.091818,0.032656,0.4775,0.439725,-0.305716,...,26.303046,1.0869,4.00502,0.13554,0.144285,-0.058979,0.816875,0.059963,14.585341,0.059963
2,324.605247,55.29323,-0.068616,11.302303,0.085572,0.093691,0.017319,0.5275,0.579638,-0.135228,...,16.586558,1.009831,3.539354,0.154279,0.123048,-0.003508,0.595625,0.139913,3.494689,0.119925
3,324.935041,71.568548,0.099315,6.802679,0.108682,0.097439,0.009262,0.496875,0.339788,-0.099239,...,28.373115,0.142985,1.522516,0.217364,0.358526,-0.050226,0.69125,0.0,25.02254,0.0
4,324.365084,100.403114,1.107893,9.65445,0.084322,0.111181,-0.018255,0.52625,0.6396,0.108415,...,172.520834,1.180176,3.505648,0.165522,0.061836,-0.311222,0.715,0.059963,1.625181,0.07995


In [51]:
inputs.keys()

Index(['speaking_time', 'avg_len_speech_segs', 'num_turns???', 'mean.pitch',
       'sd.pitch', 'mean.conf.pitch', 'sd.conf.pitch', 'mean.spec.entropy',
       'sd.spec.entropy', 'mean.val.apeak', 'sd.val.apeak', 'mean.loc.apeak',
       'sd.loc.apeak', 'mean.num.apeak', 'sd.num.apeak', 'mean.energy',
       'sd.energy', 'mean.d.energy', 'sd.d.energy', 'avg.len.seg',
       'voice.rate', 'hogv.entropy', 'hogv.median', 'hogv.cogR', 'hogv.cogC',
       'gender'],
      dtype='object')