In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from glom import glom
from datetime import timedelta


## Read, Load, Extract, Save Physioloigcal Signal (Class)

In [2]:
class readloadsignal:
    '''
    This classreads and load physiological signals exported from fitbit. It extracts the heart rate (json file), estimated oxygen variation (csv file)
    and skin temperature (csv file) and saves as seperate folders into specified directory
    '''

    def __init__(self,filenamejson,filenamecsv):
        self.filenamejson = filenamejson
        self.filenamecsv = filenamecsv

    def extractvaluejson(self):

        '''
        This method reads and extracts the heartrate from the input jsonfile for heartrate from fitbit
        '''

        df = pd.read_json(self.filenamejson)
        df_1 = df["value"].apply(lambda row:glom(row,"bpm"))
        df_2 = df["value"].apply(lambda row:glom(row, "confidence"))
        df_3 = df["dateTime"]
        df = pd.concat([df_3, df_2, df_1], axis=1, ignore_index=True)
        df.columns = ["datetime","heartrate","confidence"] #rename column
        df.set_index("datetime",inplace=True)
        df = df.loc["2022-06-15 18:45:00": "2022-06-15 20:00:00"]
        df.reset_index(inplace=True)
        return df

    def extractvaluecsv(self):

        '''
        This method reads and extracts the skintemp and estimated oxygen variation from the input csv files from fitbit
        '''

        column_dict = {"timestamp":"datetime", "recorded_time":"datetime", "dateTime":"datetime"}

        df = pd.read_csv(self.filenamejson)
        df.rename(columns=column_dict, inplace=True)
        df.datetime = pd.to_datetime(df['datetime'],format="%Y-%m-%dT%H:%M:%S", errors = 'coerce' )
        df.set_index("datetime", inplace=True)
        df = df.sort_index().loc["2022-06-15 18:41":"2022-06-15 20:00"]
        df.reset_index(inplace=True)
        
        return df

    def savesignal(self):

        '''
        This method saves each extracted physiological signal as a sepreate csv into specified directory
        '''

        try:
            df = self.extractvaluejson()
        except ValueError:
            df = self.extractvaluecsv()
        df.to_csv('Extracted data 2/{}.csv'.format(self.filenamecsv))
        print("signal successfully saved")
  

    

## Instances

Heart Rate

In [3]:
df_hr = readloadsignal("/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/MyFitbitData version 2/OlumideOdetunde/Physical Activity/heart_rate-2022-06-15.json","df_hr")
df_hr.extractvaluejson()
df_hr.savesignal()


signal successfully saved


Skin Temperature

In [4]:
df_skt = readloadsignal("/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/MyFitbitData version 2/OlumideOdetunde/Sleep/Wrist Temperature - 2022-06-15.csv","df_skt")
df_skt.extractvaluecsv()
df_skt.savesignal()

signal successfully saved


 Estimated O2 Variation

In [5]:
df_eo = readloadsignal("/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/MyFitbitData version 2/OlumideOdetunde/Other/estimated_oxygen_variation-2022-06-15.csv", "df_eo")
df_eo.extractvaluecsv()
df_eo.savesignal()

signal successfully saved


  df = df.sort_index().loc["2022-06-15 18:41":"2022-06-15 20:00"]
  df = df.sort_index().loc["2022-06-15 18:41":"2022-06-15 20:00"]


## Dataset Creation (Class)

In [6]:
class aligndatset:

    '''
    Argument: In positional order takes in extracted csvs of heart rate, estimated oxygen variation, skin temperature from the readloadsignal class.
            Followed by excel file of data label and intended name of combined dataset created 
    '''
    
    def __init__(self,filenamecsv1,filenamecsv2,filenamecsv3, filenameexcel,final_df):
        self.filenamecsv1 = filenamecsv1
        self.filenamecsv2 = filenamecsv2
        self.filenamecsv3 = filenamecsv3
        self.filenameexcel = filenameexcel
        self.final_df = final_df 

    def read_clean_signals (self):

        '''
        This method  reads the first three csv entered and saves created dataframes into a list
        '''

        csv_files = [self.filenamecsv1, self.filenamecsv2, self.filenamecsv3]
        dfs = []

        for csv in csv_files:
            column_droplist = ["Unnamed: 0"]
            df  = pd.read_csv(csv)
            df.drop(column_droplist,axis=1,inplace=True)
            df["datetime"] = pd.to_datetime(df.datetime)
            dfs.append(df)
        return dfs

    def merge_signals(self):

        '''
        This method merges the the three dataframes obatined from the readcleansignal method call on date time using closest key align technique
        '''

        column_renamelist = {"Infrared to Red Signal Ratio":"est_02_variation", "temperature":"skin_temp","heartrate":"heart_rate"}
        column_droplist = ["confidence"]
        df1, df2, df3 = self.read_clean_signals()

        df_total = pd.merge_asof(df1, df2, on="datetime",direction="backward",\
            tolerance=pd.Timedelta(seconds = 60),allow_exact_matches=True)
        df_total = pd.merge_asof(df_total,df3, on="datetime",direction="nearest",\
            tolerance=pd.Timedelta(seconds = 60),allow_exact_matches=True)
        df_total.rename(columns=column_renamelist, inplace=True)
        df_total.drop(column_droplist, inplace=True, axis=1)

        return df_total

    def load_datalabel(self):

        '''
        This method reads the data label excel file and divides into three dataframes using specified datetimes peculiar to this project
        '''
        
        #Load and extract data label
        df_dl = pd.read_excel(self.filenameexcel)
        df_dl.drop(["UserID","Soundgroup","Soundgroupselection"], axis=1, inplace=True)
        df_dl[["Starttime", "Endtime"]] = df_dl[["Starttime","Endtime"]].apply(pd.to_datetime)
        df_dl["datetime"] = df_dl["Starttime"] + timedelta(seconds = 6)
        
        #Create 3 dataframe with different time frames

        #Dataframe with Starttime
        df_dl_start = df_dl.drop(["datetime","Endtime"], axis=1)
        df_dl_start.rename(columns={"Starttime":"datetime"}, inplace=True)

        #Dataframe with starttime plus six seconds
        df_dl_after6sec = df_dl.drop(["Starttime","Endtime"], axis=1)

        #Dataframe with endtime
        df_dl_end = df_dl.drop(["datetime","Starttime"], axis=1)
        df_dl_end.rename(columns={"Endtime":"datetime"},inplace=True)

        return df_dl_start, df_dl_after6sec, df_dl_end

    def initial_dataset_merge (self):

        '''
        This method performs an inital merge using the three dataframes (created by the load_datalabel method call) and combined physiological
        signal dataframe (created by the merge_signal method call). Returns three unique dataframe of physiological signals merged to 
        the three data label dataframes
        '''

        df_dl_start, df_dl_after6sec, df_dl_end = self.load_datalabel()
        df_total = self. merge_signals()

        df_dataset_1 = pd.merge_asof(df_dl_start, df_total, on="datetime",direction="nearest", tolerance=pd.Timedelta(seconds=3),allow_exact_matches=True)

        df_dataset_2 = pd.merge_asof(df_dl_after6sec, df_total, on="datetime",direction="nearest", tolerance=pd.Timedelta(seconds=3),allow_exact_matches=True)
       
        df_dataset_3 = pd.merge_asof(df_dl_end, df_total, on="datetime",direction="nearest", tolerance=pd.Timedelta(seconds=3),allow_exact_matches=True)

        return df_dataset_1, df_dataset_2, df_dataset_3

    def final_dataset_merge (self):

        '''
        This method uses the combines the  three dataframes returned by the initial_dataset_merge method call and groups by selected key columns
        and generates required statistical features.

        '''

        df_dataset_1, df_dataset_2, df_dataset_3 = self.initial_dataset_merge()

        df_dataset = pd.concat([df_dataset_1,df_dataset_2,df_dataset_3])
        df_dataset.sort_values(by=["SoundID"], inplace=True)

        #Groupby soundid and datetime
        df_dataset = df_dataset.groupby(["SoundID","datetime","Emotion"]).mean()
        df_dataset.sort_values("datetime",inplace=True)
        df_dataset.reset_index(inplace=True)

        #derive statistical features
        df_dataset = df_dataset.groupby(["SoundID"]).agg({"datetime":max, "Emotionrating":"mean","Emotion":max, "heart_rate":['mean', 'std'],"skin_temp":['mean', "std"],}) #"est_02_variation":"mean"}) #est 02 variation was empty here mightnot be in other cases
        return df_dataset

    def save_final_dataset (self):

        '''
        This method saves the combined and final dataset created by the final_data_set_merge method call into a specified directory
        '''
        
        df = self.final_dataset_merge()
        df.to_csv('Extracted data 2/{}.csv'.format(self.final_df))
        print ("combined dataset successfully created")
        print(df.head())
        
        
        

## Instances

User 001 - First Experiment

In [7]:
df_combined= aligndatset("/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/Extracted data 2/df_hr.csv",
                            "/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/Extracted data 2/df_eo.csv",
                            "/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/Extracted data 2/df_skt.csv",
                            "/Users/olumide/Documents/Dissertation/Sound Stimuli version 1/Excel Database.xlsx",
                            "df_combinedined_user001_exp001" )
df_combined.read_clean_signals()
df_combined.merge_signals()
df_combined.load_datalabel()
df_combined.initial_dataset_merge()
df_combined.final_dataset_merge()
df_combined.save_final_dataset()

combined dataset successfully created
                   datetime Emotionrating    Emotion heart_rate       \
                        max          mean        max       mean  std   
SoundID                                                                
0085_2  2022-06-15 19:49:53           1.0    Sadness        1.0  0.0   
0109_2  2022-06-15 18:54:15           7.0  Happiness        2.0  0.0   
0123_2  2022-06-15 19:16:38           3.0       Fear        NaN  NaN   
0124_2  2022-06-15 18:52:32           4.0  Happiness        3.0  0.0   
0149_2  2022-06-15 19:27:09           4.0       Fear        1.0  0.0   

        skin_temp       
             mean  std  
SoundID                 
0085_2   1.534936  0.0  
0109_2   0.499936  0.0  
0123_2        NaN  NaN  
0124_2   0.259936  0.0  
0149_2   1.619936  0.0  
