In [70]:
import json
import pandas as pd
import os
import numpy as np
import scipy.signal as sig
from biosppy.signals.bvp import bvp
from biosppy.signals.eda import eda
import matplotlib.pyplot as plt
%matplotlib qt

### Data Preparation

![dataset_sampling_rates.png](Images/dataset_sampling_rates.png)

Notes About Signal Preparation:
- All Empatica Signals were upsampled to match the muse samples so that the data can be inputted into a CNN model consistently. This means that the sampling frequency used accross those signals is 256 Hz.
- Samsung watch signals not included at the moment because they have a sampling rate that is inconsistent with the Muse Headset
- Important modifications before inputing data to model:
    - Make sure to center at zero
    - Standardize data when appropriate (Such as for filtered EEG data)
    - Use timestamps to allign to stimuli but do not input it into model
    - Data augmentations to keep in mind: flipping the signal, making the overlap between signals larger

In [3]:
with open(r"C:\Users\nelso\Desktop\18065 Project Data\Emognition\22\22_AMUSEMENT_STIMULUS_MUSE.json") as json_file:
    data = json.load(json_file)

In [92]:
def eeg_analysis(eeg_data):
    eeg_df = pd.DataFrame.from_dict(eeg_data)
    eeg_timestamp = eeg_df.pop("TimeStamp")
    eeg_df = eeg_df.astype("float32")
    eeg_df["TimeStamp"] = eeg_timestamp
    eeg_sig_len = len(eeg_df)
    return eeg_df, eeg_sig_len

def bvp_analysis(data,eeg_len,do_plot = False):
    bvp_data = np.array(data['BVP'])[:,-1].astype('float32')
    bvp_data = sig.resample(bvp_data,eeg_len)
    _, _, _, _, heart_rate = bvp(bvp_data,256,show=do_plot)
    heart_rate = sig.resample(heart_rate,eeg_len)

    if do_plot:
        plt.figure()
        plt.plot([i/256 for i in range(len(heart_rate))],heart_rate)
        plt.title("Heart Rate Interpolated")
    return bvp_data,heart_rate

# EDA Signal Analysis
def eda_analysis(data,eeg_len,do_plot = False):
    eda_data = np.array(data['EDA'])[:,-1].astype('float32')

    eda_data = sig.resample(eda_data,eeg_len)
    _, filtered_eda, _, _, _ = eda(eda_data,256,show=do_plot)
    return filtered_eda

# To test eeg_analysis data frame generation
# with open(r"C:\Users\nelso\Desktop\18065 Project Data\Emognition\22\22_AMUSEMENT_STIMULUS_MUSE.json") as json_file:
#     eeg_data = json.load(json_file)
#     eeg_df, eeg_sig_len = eeg_analysis(eeg_data)

# To test other physiolocial signals processing
# with open(r"C:\Users\nelso\Desktop\18065 Project Data\Emognition\22\22_AMUSEMENT_STIMULUS_EMPATICA.json") as json_file:
#     data = json.load(json_file)
# BVP signal analysis:
#
# bvp_analysis(data,eeg_sig_len,True)
#
# eda_analysis(data,eeg_sig_len,True)

In [94]:
file_loc = r"C:\Users\nelso\Desktop\18065 Project Data\Emognition"

In [106]:
emotions = ["BASELINE","AMUSEMENT","ANGER","AWE","BASELINE","DISGUST","ENTHUSIASM","FEAR","LIKING","NEUTRAL","SADNESS","SURPRISE"]
devices_of_interest = ["MUSE","EMPATICA"] #MUSE must be loaded first!
Testing = False
per_user_labels = {}
per_user_stimulus_arr = {}
for root,dir,_ in os.walk(file_loc):
    for d in dir:
        # print("Directory: ", d)
        participant = os.path.join(root,d)

        #Map user to Label in Dicionary
        emotion_dict = {e:{} for e in emotions}
        labels_loc = participant + "\\" + d + "_QUESTIONNAIRES.json"
        with open(labels_loc) as json_file:
            quest_results = json.load(json_file)
            for condition in quest_results['questionnaires']:
                cond_type = condition["movie"]
                emotion_dict[cond_type] = condition["emotions"]
        per_user_labels[d] = emotion_dict
        per_user_stimulus_arr[d] = {}

        for emotion in emotions:
            for device in devices_of_interest:
                data_loc = participant + f"\\{d}_{emotion}_STIMULUS_{device}.json"

                try:
                    with open(data_loc) as json_file:
                        data = json.load(json_file)
                        if device == "MUSE":
                            per_user_stimulus_arr[d][emotion],eeg_sig_len = eeg_analysis(data)
                        elif device == "EMPATICA":
                            bvp_data, heart_rate = bvp_analysis(data,eeg_sig_len,do_plot=Testing)
                            per_user_stimulus_arr[d][emotion]["BVP_Filt"] = bvp_data
                            per_user_stimulus_arr[d][emotion]["HR_EMPATICA"] = heart_rate
                            filt_eda = eda_analysis(data,eeg_sig_len,do_plot = Testing)
                            per_user_stimulus_arr[d][emotion]["EDA_EMPATICA"] = filt_eda
                except FileNotFoundError:
                    break
                except Exception as e:
                    raise e

KeyboardInterrupt: 

In [104]:
per_user_stimulus_arr["22"]["BASELINE"]

Unnamed: 0,Delta_TP9,Delta_AF7,Delta_AF8,Delta_TP10,Theta_TP9,Theta_AF7,Theta_AF8,Theta_TP10,Alpha_TP9,Alpha_AF7,...,HeadBandOn,HSI_TP9,HSI_AF7,HSI_AF8,HSI_TP10,Battery,TimeStamp,BVP_Filt,HR_EMPATICA,EDA_EMPATICA
0,0.647016,-0.085379,0.145839,0.693771,0.72223,-0.219881,-0.246561,0.762460,0.959443,0.131083,...,1.0,1.0,1.0,1.0,1.0,100.0,2020-07-16T09:12:47.018000,294.908569,84.438228,3.290601
1,0.647016,-0.085379,0.145839,0.693771,0.72223,-0.219881,-0.246561,0.762460,0.959443,0.131083,...,1.0,1.0,1.0,1.0,1.0,100.0,2020-07-16T09:12:47.019000,336.434235,84.418796,3.290523
2,0.647016,-0.085379,0.145839,0.693771,0.72223,-0.219881,-0.246561,0.762460,0.959443,0.131083,...,1.0,1.0,1.0,1.0,1.0,100.0,2020-07-16T09:12:47.020000,350.648682,84.398962,3.290446
3,0.647016,-0.085379,0.145839,0.693771,0.72223,-0.219881,-0.246561,0.762460,0.959443,0.131083,...,1.0,1.0,1.0,1.0,1.0,100.0,2020-07-16T09:12:47.021000,345.229523,84.378732,3.290373
4,0.647016,-0.085379,0.145839,0.693771,0.72223,-0.219881,-0.246561,0.762460,0.959443,0.131083,...,1.0,1.0,1.0,1.0,1.0,100.0,2020-07-16T09:12:47.022000,332.881897,84.358110,3.290303
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81537,0.615062,1.191643,0.831269,-0.083804,0.14550,0.610797,0.448910,0.053056,0.425060,0.229382,...,1.0,2.0,1.0,1.0,2.0,100.0,2020-07-16T09:18:04.983000,-29.152262,84.529231,2.384798
81538,0.615062,1.191643,0.831269,-0.083804,0.14550,0.610797,0.448910,0.053056,0.425060,0.229382,...,1.0,2.0,1.0,1.0,2.0,100.0,2020-07-16T09:18:04.984000,3.337784,84.511865,2.389775
81539,0.615062,1.191643,0.831269,-0.083804,0.14550,0.610797,0.448910,0.053056,0.425060,0.229382,...,1.0,2.0,1.0,1.0,2.0,100.0,2020-07-16T09:18:04.985000,64.889992,84.494078,2.394637
81540,0.615062,1.191643,0.831269,-0.083804,0.14550,0.610797,0.448910,0.053056,0.425060,0.229382,...,1.0,2.0,1.0,1.0,2.0,100.0,2020-07-16T09:18:04.985000,145.178665,84.475874,2.399381


In [63]:
per_user_labels['22']

{'SADNESS': {'AWE': 1,
  'DISGUST': 1,
  'SURPRISE': 2,
  'ANGER': 2,
  'ENTHUSIASM': 1,
  'LIKING': 1,
  'FEAR': 1,
  'AMUSEMENT': 1,
  'SADNESS': 4},
 'NEUTRAL': {'AWE': 1,
  'DISGUST': 1,
  'SURPRISE': 1,
  'ANGER': 1,
  'ENTHUSIASM': 1,
  'LIKING': 1,
  'FEAR': 1,
  'AMUSEMENT': 1,
  'SADNESS': 1},
 'ENTHUSIASM': {'AWE': 3,
  'DISGUST': 1,
  'SURPRISE': 1,
  'ANGER': 1,
  'ENTHUSIASM': 4,
  'LIKING': 4,
  'FEAR': 1,
  'AMUSEMENT': 2,
  'SADNESS': 2},
 'DISGUST': {'AWE': 1,
  'DISGUST': 4,
  'SURPRISE': 2,
  'ANGER': 1,
  'ENTHUSIASM': 1,
  'LIKING': 1,
  'FEAR': 1,
  'AMUSEMENT': 2,
  'SADNESS': 1},
 'AWE': {'AWE': 4,
  'DISGUST': 1,
  'SURPRISE': 2,
  'ANGER': 1,
  'ENTHUSIASM': 4,
  'LIKING': 4,
  'FEAR': 1,
  'AMUSEMENT': 1,
  'SADNESS': 2},
 'SURPRISE': {'AWE': 1,
  'DISGUST': 1,
  'SURPRISE': 4,
  'ANGER': 1,
  'ENTHUSIASM': 1,
  'LIKING': 1,
  'FEAR': 3,
  'AMUSEMENT': 1,
  'SADNESS': 1},
 'LIKING': {'AWE': 3,
  'DISGUST': 1,
  'SURPRISE': 1,
  'ANGER': 1,
  'ENTHUSIASM': 2,


In [54]:
emotions = {"BASELINE","AMUSEMENT","ANGER","AWE","BASELINE","DISGUST","ENTHUSIASM","FEAR","LIKING","NEUTRAL","SADNESS","SURPRISE"}
emotion_dict = {e:{} for e in emotions}

In [55]:
for condition in quest_results['questionnaires']:
    cond_type = condition["movie"]
    emotion_dict[cond_type] = condition["emotions"]


In [58]:
emotion_dict.values()

dict_values([{'AWE': 1, 'DISGUST': 1, 'SURPRISE': 2, 'ANGER': 2, 'ENTHUSIASM': 1, 'LIKING': 1, 'FEAR': 1, 'AMUSEMENT': 1, 'SADNESS': 4}, {'AWE': 1, 'DISGUST': 1, 'SURPRISE': 1, 'ANGER': 1, 'ENTHUSIASM': 1, 'LIKING': 1, 'FEAR': 1, 'AMUSEMENT': 1, 'SADNESS': 1}, {'AWE': 3, 'DISGUST': 1, 'SURPRISE': 1, 'ANGER': 1, 'ENTHUSIASM': 4, 'LIKING': 4, 'FEAR': 1, 'AMUSEMENT': 2, 'SADNESS': 2}, {'AWE': 1, 'DISGUST': 4, 'SURPRISE': 2, 'ANGER': 1, 'ENTHUSIASM': 1, 'LIKING': 1, 'FEAR': 1, 'AMUSEMENT': 2, 'SADNESS': 1}, {'AWE': 4, 'DISGUST': 1, 'SURPRISE': 2, 'ANGER': 1, 'ENTHUSIASM': 4, 'LIKING': 4, 'FEAR': 1, 'AMUSEMENT': 1, 'SADNESS': 2}, {'AWE': 1, 'DISGUST': 1, 'SURPRISE': 4, 'ANGER': 1, 'ENTHUSIASM': 1, 'LIKING': 1, 'FEAR': 3, 'AMUSEMENT': 1, 'SADNESS': 1}, {'AWE': 3, 'DISGUST': 1, 'SURPRISE': 1, 'ANGER': 1, 'ENTHUSIASM': 2, 'LIKING': 3, 'FEAR': 1, 'AMUSEMENT': 2, 'SADNESS': 1}, {'AWE': 1, 'DISGUST': 1, 'SURPRISE': 2, 'ANGER': 1, 'ENTHUSIASM': 2, 'LIKING': 1, 'FEAR': 1, 'AMUSEMENT': 3, 'SADNESS':

In [None]:
with open()

In [10]:
for name,d in data.items():
    print(name)
    print("Data Length: ",len(d))

TimeStamp
Data Length:  30739
Delta_TP9
Data Length:  30739
Delta_AF7
Data Length:  30739
Delta_AF8
Data Length:  30739
Delta_TP10
Data Length:  30739
Theta_TP9
Data Length:  30739
Theta_AF7
Data Length:  30739
Theta_AF8
Data Length:  30739
Theta_TP10
Data Length:  30739
Alpha_TP9
Data Length:  30739
Alpha_AF7
Data Length:  30739
Alpha_AF8
Data Length:  30739
Alpha_TP10
Data Length:  30739
Beta_TP9
Data Length:  30739
Beta_AF7
Data Length:  30739
Beta_AF8
Data Length:  30739
Beta_TP10
Data Length:  30739
Gamma_TP9
Data Length:  30739
Gamma_AF7
Data Length:  30739
Gamma_AF8
Data Length:  30739
Gamma_TP10
Data Length:  30739
RAW_TP9
Data Length:  30739
RAW_AF7
Data Length:  30739
RAW_AF8
Data Length:  30739
RAW_TP10
Data Length:  30739
AUX_RIGHT
Data Length:  30739
Accelerometer_X
Data Length:  30739
Accelerometer_Y
Data Length:  30739
Accelerometer_Z
Data Length:  30739
Gyro_X
Data Length:  30739
Gyro_Y
Data Length:  30739
Gyro_Z
Data Length:  30739
HeadBandOn
Data Length:  30739
HSI_TP

In [16]:
df = pd.DataFrame.from_dict(data)

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30739 entries, 0 to 30738
Data columns (total 38 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   TimeStamp        30739 non-null  object 
 1   Delta_TP9        30711 non-null  float64
 2   Delta_AF7        30711 non-null  float64
 3   Delta_AF8        30711 non-null  float64
 4   Delta_TP10       30711 non-null  float64
 5   Theta_TP9        30711 non-null  float64
 6   Theta_AF7        30711 non-null  float64
 7   Theta_AF8        30711 non-null  float64
 8   Theta_TP10       30711 non-null  float64
 9   Alpha_TP9        30711 non-null  float64
 10  Alpha_AF7        30711 non-null  float64
 11  Alpha_AF8        30711 non-null  float64
 12  Alpha_TP10       30711 non-null  float64
 13  Beta_TP9         30711 non-null  float64
 14  Beta_AF7         30711 non-null  float64
 15  Beta_AF8         30711 non-null  float64
 16  Beta_TP10        30711 non-null  float64
 17  Gamma_TP9   

In [30]:
sampling_rates_original = {'BVP':64,'TEMP':4,'EDA':4}
testing_data = True
with open(r"C:\Users\nelso\Desktop\18065 Project Data\Emognition\22\22_AMUSEMENT_STIMULUS_EMPATICA.json") as json_file:
    data = json.load(json_file)
    for d_type,physio_and_stamps in data.items():
        physio_data = np.array(physio_and_stamps)[:,-1].astype("float32")
        physio_data = sig.resample(physio_data,)
        if testing_data:

    # df = pd.DataFrame.from_dict(data)
    # print(df.to_numpy().shape)
# df = pd.DataFrame.from_dict(data)
data.keys()

dict_keys(['BVP', 'TEMP', 'IBI', 'ACC', 'EDA'])

In [None]:
#Calculations to ensure sampling rate and time of recordings match
approx_samp_rate_subject20 = 64*(30739/7680)
approx_exp_duration_subject20 = 30739/256/60