Code for the preprocessing of the stress test files in preparation for the HRV analysis

**Instalation of the packages**

In [None]:
pip install "devicely"
pip install "jointly"

**Import necessary packages**

In [None]:
import devicely
import jointly
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Read Empatica data ##

In [None]:
empatica_reader = devicely.EmpaticaReader("path") #read the Empatica data from subject

In [None]:
empatica_reader.data.head() #check the head of the whole dataset

In [None]:
empatica_reader.IBI.head() #check the head of Empatica's IBI

**Create a subset to test with the end of the pipeline (EMPATICA)**

In [None]:
empatica_test = empatica_reader  # copy the data into a working subset

In [None]:
empatica_test.data.head()  #check and match the head of the data

In [None]:
empatica_test = empatica_test.data.reset_index() # delete the multiIndex

In [None]:
empatica_test.head()

In [None]:
 #delete the unnecessary columns
empatica_test = empatica_test.drop(columns = ["bvp", "eda", "hr", "temp", "acc_x", "acc_y", "acc_z", "acc_mag", "timedelta"])

In [None]:
empatica_test #check the data

In [None]:
empatica_test = empatica_test.loc[empatica_test["ibi"].isnull() != True] # drop all the NaN rows

In [None]:
 #drop the values of RR = 0 (an interval between to subsequents RR can not be 0)
empatica_test = empatica_test.drop(empatica_test[empatica_test.ibi == 0.0].index)

In [None]:
 #save the working subset
empatica_test.to_csv("path.csv", date_format = "%Y-%m-%d %H:%M:%S.%f", index=False)

## Read Faros data ##

In [None]:
faros_reader = devicely.FarosReader("path") #read the Faros data from subject

In [None]:
faros_reader.data.head() #check the head of the whole dataset

**Create a subset to test with the end of the pipeline (FAROS)**

In [None]:
faros_test = faros_reader # copy the data into a working subset

In [None]:
faros_test.data.head() #check and match the head of the data

In [None]:
faros_test = faros_test.data.reset_index() # delete the multiIndex

In [None]:
faros_test.head()

In [None]:
 #delete the unnecessary columns
faros_test = faros_test.drop(columns = ["ECG", "Accelerometer_X", "Accelerometer_Y", "Accelerometer_Z", "Marker", "acc_mag"])

In [None]:
faros_test #check the data

In [None]:
faros_test = faros_test.loc[faros_test["HRV"].isnull() != True] # drop all the NaN rows

In [None]:
faros_test = faros_test.drop(faros_test[faros_test.HRV == 0.0].index) #drop the values of RR = 0 (an interval between to subsequents RR can not be 0)

In [None]:
 #save the working subset
faros_test.to_csv("path.csv", date_format = "%Y-%m-%d %H:%M:%S.%f", index=False)

## Sync the datasets ##

***Implement jointly***

**Sync the data**

In [None]:
sources = {
    'Faros': {
        'data': faros_reader_sub1.data,
        'ref_column': 'acc_mag',
    },
    'Empatica': {
        'data': empatica_reader_sub1.data,
        'ref_column': 'acc_mag',
    }
}
ref_source_name = 'Faros'

extractor = jointly.ShakeExtractor()
synchronizer = jointly.Synchronizer(sources, ref_source_name, extractor)
synced_data = synchronizer.get_synced_data()

**Save the data**

In [None]:
tables = {
    'ACC': {
        'Faros': ['Accelerometer_X', 'Accelerometer_Y', 'Accelerometer_Z'],
        'Empatica': ['acc_x', 'acc_y', 'acc_z'],
    },
    'PPG': {
        'Empatica': ['bvp'],
    },
    'EDA': {
        'Empatica': ['eda'],
    },
    'ECG': {
        'Faros': ['ECG'],
    },
    'TEMP': {
        'Empatica': ['temp'],
    },
    'HR': {
        'Empatica': ['hr'],
    },   
    'IBI': {
        'Faros': ['HRV'],
        'Empatica': ['ibi'],
    }
}


synchronizer.save_data("path", tables=tables)
#after the files had been sync, this steps save the file in the destination folder. It creates: "ACC", "ECG", "EDA", "HR", "IBI", "PPG", "SYNC",
#"TEMP" and "TOTAL" files.


In [None]:
#import the IBI.csv file (our working file). This contains the RR from both Empatica and Faros synced and in one file. 
df1 = pd.read_csv("path...\IBI.csv", names=["date_time","faros_ibi","empatica_ibi"], 
                 dtype={"empatica_ibi": np.float64, "faros_ibi":np.float64}, skiprows=1)

**Plot**

In [None]:
data_ibi = pd.read_csv("path...\IBI.csv", index_col=0)
data_ibi.index = pd.to_datetime(data_ibi.index)
data_ibi['Empatica_ibi'] = data_ibi['Empatica_ibi'] * 1000
plot = data_ibi.interpolate(method='time').plot(figsize=(15,6), ylim=(300,1500))
plt.savefig("path...\plot.pdf")

In [None]:
df1.head() #Check the head of the data

In [None]:
df1["date_time"]= pd.to_datetime(df1['date_time']) # transform the time into data time.

In [None]:
df1.dtypes #check the types of the data, proff time is a correct type

In [None]:
df1.head() #check the head(content) of the table. It is important to notice that for the different sample rates, 
#it will be highly unlikely to have a signal registered in both of the devices. 

**Create a subset for FAROS**

In [None]:
df1_1 = df1.loc[df1["faros_ibi"].isnull() != True] #save in a new DF a subset where all the NaN in Faros are droped.

In [None]:
df1_1.head() #check the data

In [None]:
df1_1 = df1_1.drop(columns = ["empatica_ibi"]) # drop the column of Empatica from our subset

In [None]:
df1_1 = df1_1.drop(df1_1[df1_1.faros_ibi == 0.0].index) #drop the values of RR = 0 (an interval between to subsequents RR can not be 0)

In [None]:
df1_1.head() #check the head of the file

In [None]:
df1_1.dtypes #check the datatypes

In [None]:
#save the Faros subset into a new file
df1_1.to_csv("path...\RR_faros.csv", date_format = "%Y-%m-%d %H:%M:%S.%f", index=False) 

***Corrected Faros subset FIRST REST***

In [None]:
#read and store the saved file
dfsub1_1 = pd.read_csv("path...\RR_faros.csv")

In [None]:
#set the date/time as index
dfsub1_1 = dfsub1_1.set_index(pd.DatetimeIndex(dfsub1_1['date_time']))

In [None]:
#use the date/time index to filter the data to our threshold example: 14:16:30
dfsub1_1 = dfsub1_1.between_time("start_time","end_time")

In [None]:
#save the filtered subset into a new file
dfsub1_1.to_csv("path...\RR_faros_fit.csv", index=False) 

**Create a subset for EMPATICA**

In [None]:
df1_2 = df1.loc[df1["empatica_ibi"].isnull() != True] #save in a new DF a subset where all the NaN in Empatica are droped.

In [None]:
df1_2.head() #check the data

In [None]:
df1_2 = df1_2.drop(columns = ["faros_ibi"]) # drop the column of Faros from our subset

In [None]:
df1_2 = df1_2.drop(df1_2[df1_2.empatica_ibi == 0.0].index) #drop the values of RR = 0 (an interval between to subsequents RR can not be 0)

In [None]:
df1_2.head() #check the head of the file

In [None]:
df1_2.dtypes #check the datatypes

In [None]:
#save the Faros subset into a new file
df1_2.to_csv("path...\RR_empatica.csv", date_format = "%Y-%m-%d %H:%M:%S.%f", index=False) 

***Corrected Empatica subset***

In [None]:
#read and store the saved file
dfsub1_2 = pd.read_csv("path...\RR_empatica.csv")

In [None]:
#set the date/time as index
dfsub1_2 = dfsub1_2.set_index(pd.DatetimeIndex(dfsub1_2['date_time']))

In [None]:
#use the date/time index to filter the data to our threshold example: 14:16:30
dfsub1_2 = dfsub1_2.between_time("start_time","end_time")

In [None]:
#match the difference of scale
dfsub1_2["empatica_ibi"] = dfsub1_2["empatica_ibi"]*1000

In [None]:
#save the filtered subset into a new file
dfsub1_2.to_csv("path...\RR_empatica_fit.csv", index=False) 