In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from glom import glom
from datetime import timedelta


In [2]:
df = pd.read_csv('Extracted data 2/Tableau exp 1 data user 002.csv')
df.describe()

Unnamed: 0,Emotionrating,heart_rate,skin_temp,userid
count,166.0,165.0,165.0,166.0
mean,3.03012,74.967677,-0.903594,2.0
std,2.124674,15.39203,0.176319,0.0
min,1.0,54.0,-1.317816,2.0
25%,1.0,60.0,-1.05115,2.0
50%,2.0,70.0,-0.91115,2.0
75%,4.0,91.0,-0.79615,2.0
max,8.0,102.333333,-0.58115,2.0


## Read, Load, Extract, Save Physioloigcal Signal (Class)

In [3]:
class readloadsignal:
    
    '''
    This classreads and load physiological signals exported from fitbit. It extracts the heart rate (json file), estimated oxygen variation (csv file)
    and skin temperature (csv file) and saves as seperate folders into specified directory
    '''

    def __init__(self,filename,output_name):
        self.filename = filename
        self.output_name = output_name

    def extractvaluejson(self):

        '''
        This method reads and extracts the heartrate from the input jsonfile for heartrate from fitbit
        '''

        df = pd.read_json(self.filename)
        df_1 = df["value"].apply(lambda row:glom(row,"bpm"))
        df_2 = df["value"].apply(lambda row:glom(row, "confidence"))
        df_3 = df["dateTime"]
        df = pd.concat([df_3, df_2, df_1], axis=1, ignore_index=True)
        df.columns = ["datetime","heartrate","confidence"] #rename column
        df.set_index("datetime",inplace=True)
        df = df.loc["2022-06-21 14:33:00": "2022-06-21 15:29:05"]
        df.reset_index(inplace=True)
        return df

    def extractvaluecsv(self):

        '''
        This method reads and extracts the skintemp and estimated oxygen variation from the input csv files from fitbit
        '''

        column_dict = {"timestamp":"datetime", "recorded_time":"datetime", "dateTime":"datetime"}

        df = pd.read_csv(self.filename)
        df.rename(columns=column_dict, inplace=True)
        df["datetime"] = pd.to_datetime(df.datetime)
        df.set_index("datetime", inplace=True)
        df = df.loc["2022-06-21 14:33:00": "2022-06-21 15:29:05"]
        df.reset_index(inplace=True)
        
        return df

    def savesignal(self):

        '''
        This method saves each extracted physiological signal as a sepreate csv into specified directory
        '''

        try:
            df = self.extractvaluejson()
        except ValueError:
            df = self.extractvaluecsv()
        df.to_csv('Extracted data 2/{}.csv'.format(self.output_name))
        print("signal successfully saved")
  

    

# Instances

## Experiment 1

Heart Rate - Exp 1

In [4]:
df_hr = readloadsignal("MyFitbitData_Zihan/Zihan/Physical Activity/heart_rate-2022-06-21.json","df_hr")
df_hr.extractvaluejson()
df_hr.savesignal()

signal successfully saved


Skin Temperature Exp 1

In [5]:
df_skt = readloadsignal("MyFitbitData_Zihan/Zihan/Sleep/Wrist Temperature - 2022-06-21.csv","df_skt")
df_skt.extractvaluecsv()
df_skt.savesignal()

signal successfully saved


 Estimated O2 Variation Exp 1

In [6]:
df_eo = readloadsignal("MyFitbitData_Zihan/Zihan/Other/estimated_oxygen_variation-2022-06-21.csv","df_eo")
df_eo.extractvaluecsv()
df_eo.savesignal()

signal successfully saved


## Dataset Creation (Class)

In [7]:
class aligndatset:

    '''
    Argument: In positional order takes in extracted csvs of heart rate, estimated oxygen variation, skin temperature from the readloadsignal class.
            Followed by excel file of data label and intended name of combined dataset created 
    '''
    
    def __init__(self,filenamecsv1,filenamecsv2,filenamecsv3, filenameexcel,final_df):
        self.filenamecsv1 = filenamecsv1
        self.filenamecsv2 = filenamecsv2
        self.filenamecsv3 = filenamecsv3
        self.filenameexcel = filenameexcel
        self.final_df = final_df 

    def read_clean_signals (self):

        '''
        This method  reads the first three csv entered and saves created dataframes into a list
        '''

        csv_files = [self.filenamecsv1, self.filenamecsv2, self.filenamecsv3]
        dfs = []

        for csv in csv_files:
            column_droplist = ["Unnamed: 0"]
            df  = pd.read_csv(csv)
            df.drop(column_droplist,axis=1,inplace=True)
            df["datetime"] = pd.to_datetime(df.datetime)
            dfs.append(df)
        return dfs

    def merge_signals(self):

        '''
        This method merges the the three dataframes obatined from the readcleansignal method call on date time using closest key align technique
        '''

        column_renamelist = {"Infrared to Red Signal Ratio":"est_02_variation", "temperature":"skin_temp","confidence":"heart_rate"}
        column_droplist = ["heartrate"]
        df1, df2, df3 = self.read_clean_signals()
        
        # print (df1)
        # print (df2)
        # print (df3)

        df_total = pd.merge_asof(df1, df2, on="datetime",direction="backward",\
            tolerance=pd.Timedelta(seconds = 60),allow_exact_matches=True)
        df_total = pd.merge_asof(df_total,df3, on="datetime",direction="nearest",\
            tolerance=pd.Timedelta(seconds = 60),allow_exact_matches=True)
        df_total.rename(columns=column_renamelist, inplace=True)
        df_total.drop(column_droplist, inplace=True, axis=1)

        return df_total

    def load_datalabel(self):

        '''
        This method reads the data label excel file and divides into three dataframes using specified datetimes peculiar to this project
        '''
        
        #Load and extract data label
        df_dl = pd.read_excel(self.filenameexcel)
        df_dl.drop(["UserID","Soundgroupselection"], axis=1, inplace=True)
        df_dl[["Starttime", "Endtime"]] = df_dl[["Starttime","Endtime"]].apply(pd.to_datetime)
        df_dl["datetime"] = df_dl["Starttime"] + timedelta(seconds = 6)
        
        #Create 3 dataframe with different time frames

        #Dataframe with Starttime
        df_dl_start = df_dl.drop(["datetime","Endtime"], axis=1)
        df_dl_start.rename(columns={"Starttime":"datetime"}, inplace=True)

        #Dataframe with starttime plus six seconds
        df_dl_after6sec = df_dl.drop(["Starttime","Endtime"], axis=1)

        #Dataframe with endtime
        df_dl_end = df_dl.drop(["datetime","Starttime"], axis=1)
        df_dl_end.rename(columns={"Endtime":"datetime"},inplace=True)

        return df_dl_start, df_dl_after6sec, df_dl_end

    def initial_dataset_merge (self):

        '''
        This method performs an inital merge using the three dataframes (created by the load_datalabel method call) and combined physiological
        signal dataframe (created by the merge_signal method call). Returns three unique dataframe of physiological signals merged to 
        the three data label dataframes
        '''

        df_dl_start, df_dl_after6sec, df_dl_end = self.load_datalabel()
        df_total = self. merge_signals()

        df_dataset_1 = pd.merge_asof(df_dl_start, df_total, on="datetime",direction="nearest", tolerance=pd.Timedelta(seconds=3),allow_exact_matches=True)

        df_dataset_2 = pd.merge_asof(df_dl_after6sec, df_total, on="datetime",direction="nearest", tolerance=pd.Timedelta(seconds=3),allow_exact_matches=True)
       
        df_dataset_3 = pd.merge_asof(df_dl_end, df_total, on="datetime",direction="nearest", tolerance=pd.Timedelta(seconds=3),allow_exact_matches=True)
        # print ("1", df_dataset_1)
        # print ("2", df_dataset_2)
        # print ("3", df_dataset_3)
        return df_dataset_1, df_dataset_2, df_dataset_3

    def final_dataset_merge (self):

        '''
        This method uses the combines the  three dataframes returned by the initial_dataset_merge method call and groups by selected key columns
        and generates required statistical features.

        '''

        df_dataset_1, df_dataset_2, df_dataset_3 = self.initial_dataset_merge()
        df_dataset = pd.concat([df_dataset_1,df_dataset_2,df_dataset_3])
        df_dataset.sort_values(by=["SoundID"], inplace=True)

        #Groupby soundid and datetime
        df_dataset = df_dataset.groupby(["SoundID","datetime","Emotion","Soundgroup"]).mean()
        df_dataset.sort_values("datetime",inplace=True)
        df_dataset.reset_index(inplace=True)
        print(df_dataset)

        #derive statistical features
        try:
            df_dataset = df_dataset.groupby(["SoundID"]).agg({"datetime":max, "Emotionrating":"mean","Emotion":max,"Soundgroup":max, "heart_rate":['mean', 'std'],
                                    "skin_temp":['mean', "std"]})
        except KeyError:
            print('error')
            # df_dataset = df_dataset.groupby(["SoundID"]).agg({"datetime":max, "Emotionrating":"mean","Emotion":max, "heart_rate":['mean', 'std'],
            #                                 "skin_temp":['mean', "std"]})
        return df_dataset

    def save_final_dataset (self):

        '''
        This method saves the combined and final dataset created by the final_data_set_merge method call into a specified directory
        '''
        
        df = self.final_dataset_merge()
        df.to_csv('Extracted data 2/{}.csv'.format(self.final_df))
        print ("combined dataset successfully created")
        print(df.head())
        
        
        

## Instances

User 002 - First Experiment

In [8]:
df_combined= aligndatset("Extracted data 2/df_hr.csv","Extracted data 2/df_skt.csv","Extracted data 2/df_eo.csv","Excel Database 2.xlsx","df_combinedined_userzihan_exp001" )
df_combined.read_clean_signals()
df_combined.merge_signals()
df_combined.load_datalabel()
df_combined.initial_dataset_merge()
df_combined.final_dataset_merge()
df_combined.save_final_dataset()

    SoundID            datetime  Emotion Soundgroup  Emotionrating  \
0    0109_2 2022-06-17 14:58:11     Fear   Practice            7.0   
1    0109_2 2022-06-17 14:58:17     Fear   Practice            7.0   
2    0109_2 2022-06-17 14:58:23     Fear   Practice            7.0   
3    0153_2 2022-06-21 14:33:03     Fear     Animal            3.0   
4    0153_2 2022-06-21 14:33:09     Fear     Animal            3.0   
..      ...                 ...      ...        ...            ...   
493  1366_2 2022-06-21 15:28:44  Sadness  Transport            1.0   
494  1366_2 2022-06-21 15:28:49  Sadness  Transport            1.0   
495  1377_2 2022-06-21 15:28:52  Sadness  Transport            1.0   
496  1377_2 2022-06-21 15:28:58  Sadness  Transport            1.0   
497  1377_2 2022-06-21 15:29:01  Sadness  Transport            1.0   

     heart_rate  skin_temp  
0           NaN        NaN  
1           NaN        NaN  
2           NaN        NaN  
3          60.0   -0.64115  
4          59.