In [43]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [44]:
custom_colors = ['#784888', '#94b3c4', '#897cac', '#d8bfd4', '#f6e8e7']

sns.set_palette(custom_colors)

plt.rcParams['axes.prop_cycle'] = plt.cycler(color=custom_colors)
plt.rcParams['axes.titlesize'] = 35
plt.rcParams['axes.labelsize'] = 30
plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20
plt.rcParams['legend.fontsize'] = 15

In [45]:
def create_dataframe_features(data, eda_sf, acc_sf, bvp_sf, tem_sf):
    """
        @brief: Generates a dataframe that contains all of the features
        @param: data (dictionary): Data to create the features dataframe from
        @param: eda_sf: Sampling frequency of electrodermal activity
        @param: acc_sf: Sampling frequency of acceleration
        @param: bvp_sf: Sampling frequency of blood volume pulse
        @param: tem_sf: Sampling frequency of body temperature
    """
    columns_EDA = []
    for i in range(0, eda_sf):
        columns_EDA.append("EDA_"+str(i))
    
    columns_TEMP = []
    for i in range(0, tem_sf):
        columns_TEMP.append("TEMP_"+str(i))
        
    columns_ACC1 = []
    for i in range(0, acc_sf):
        columns_ACC1.append("ACC1_"+str(i))
        
    columns_ACC2 = []
    for i in range(0, acc_sf):
        columns_ACC2.append("ACC2_"+str(i))
        
    columns_ACC3 = []
    for i in range(0, acc_sf):
        columns_ACC3.append("ACC3_"+str(i))
        
    columns_BVP = []
    for i in range(0, bvp_sf):
        columns_BVP.append("BVP_"+str(i))
    
    df1 = pd.DataFrame(data['EDA'][:,:,0], columns=columns_EDA)
    df2 = pd.DataFrame(data['TEMP'][:,:,0], columns=columns_TEMP)
    df3 = pd.DataFrame(data['ACC'][:,:,0], columns=columns_ACC1)
    df4 = pd.DataFrame(data['ACC'][:,:,1], columns=columns_ACC2)
    df5 = pd.DataFrame(data['ACC'][:,:,2], columns=columns_ACC3)
    df6 = pd.DataFrame(data['BVP'][:,:,0], columns=columns_BVP)
    features = pd.concat([df1, df2, df3, df4, df5, df6], axis=1)
    return features

In [46]:
def create_dataframe_labels(data):
    """
        @brief: Generates a dataframe that contains the stress labels
        @param: data (dictionary): Data to extract labels from
    """
    labels = pd.DataFrame(data['labels'], columns=["stress"])
    return labels

In [47]:
def create_dataframe_ids(data):
    """
        @brief: Generates a dataframe that contains the participants' ids
        @param: data (dictionary): Data to extract ids from
    """
    ids = pd.DataFrame(data['id'], columns=["id"])
    return ids

In [48]:
modalities = ['ACC', 'BVP', 'EDA', 'TEMP']

#sampling frequencies
ACC_WE = 32
BVP_WE = 64
EDA_WE = 4
TEMP_WE = 4

ACC_AD = 1920
BVP_AD = 3840
EDA_AD = 240
TEMP_AD = 240

In [49]:
#Load data
wesad = pd.read_pickle('../data/wesad/All_ID.pkl')

#Create the dataframe of the features
wesad_features = create_dataframe_features(wesad["data"], EDA_WE, ACC_WE, BVP_WE, TEMP_WE)

#Create the dataframe of the labels
wesad_labels = create_dataframe_labels(wesad)

#Create the dataframe of the ids
wesad_ids = create_dataframe_ids(wesad)

#Concatenate features and labels
wesad_dataset = pd.concat([wesad_ids, wesad_features, wesad_labels], axis = 1)

wesad_dataset

Unnamed: 0,id,EDA_0,EDA_1,EDA_2,EDA_3,TEMP_0,TEMP_1,TEMP_2,TEMP_3,ACC1_0,...,BVP_55,BVP_56,BVP_57,BVP_58,BVP_59,BVP_60,BVP_61,BVP_62,BVP_63,stress
0,2,5452.450365,5366.846848,5264.122627,5115.746165,10279.722091,10279.722091,10279.722091,10279.722091,126.326243,...,-104.371386,-124.493001,-136.216532,-137.408002,-128.387507,-111.782506,-91.536409,-71.450360,-54.013997,0.0
1,2,5264.122627,5115.746165,5127.156998,5013.021944,10279.722091,10279.722091,10279.722091,10279.722091,90.734562,...,40.556707,36.106477,30.326959,25.850055,23.818332,23.653838,23.849453,23.195923,21.515416,0.0
2,2,5127.156998,5013.021944,5372.556718,5235.591090,10279.722091,10279.722091,10279.722091,10279.722091,-27.904372,...,59.620226,48.839201,38.658357,31.562887,28.882079,28.415272,27.312718,22.711332,13.432982,0.0
3,2,5372.556718,5235.591090,5184.228979,5155.692989,10279.722091,10279.722091,10464.748540,10464.748540,-27.904372,...,-92.892373,-97.182554,-94.319469,-83.391734,-65.897576,-45.375840,-26.334550,-12.494826,-5.474934,0.0
4,2,5184.228979,5155.692989,5115.746165,5081.504758,10464.748540,10464.748540,10464.748540,10464.748540,-39.768266,...,-6.795332,-4.105633,-2.803019,-2.358440,-2.225067,-2.042790,-1.833838,-1.958320,-2.878597,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78761,17,-1141.515991,-1141.515991,-1146.439053,-1141.515991,-11412.917628,-11802.683112,-11802.683112,-11802.683112,57.652528,...,13.827887,13.207938,12.701858,12.233733,11.664393,10.917924,9.918415,8.640562,7.097016,0.0
78762,17,-1146.439053,-1141.515991,-1141.515991,-1141.515991,-11802.683112,-11802.683112,-11802.683112,-11802.683112,57.652528,...,-28.632268,-26.405514,-23.444943,-20.092160,-16.562248,-12.918468,-9.186125,-5.339913,-1.519005,0.0
78763,17,-1141.515991,-1141.515991,-1136.589079,-1151.365964,-11802.683112,-11802.683112,-11802.683112,-11802.683112,57.652528,...,12.448817,12.195777,11.816217,11.322788,10.753448,10.146151,9.526202,8.868298,8.045917,0.0
78764,17,-1136.589079,-1151.365964,-1141.515991,-1141.515991,-11802.683112,-11802.683112,-11802.683112,-11412.917628,57.652528,...,-9.110212,-7.427495,-5.491737,-3.189070,-0.544800,2.213339,4.908218,7.350056,9.450290,0.0


In [50]:
wesad_dataset['stress'].value_counts()

stress
0.0    58834
1.0    19932
Name: count, dtype: int64

In [51]:
wesad_dataset.to_pickle('../data/wesad/wesad_dataset.pkl')