### E4FileFormatter


##### **Input:** Unzipped files of raw .csv files downloaded from Empatica. (You only need to specify files)
##### **Output:** Properly formatted .csv files compiled from all recordings with correct datatimestamps


**Check:** 
* Time Zone Correction- may need to change this dependent on time zone the data from the watch was uploaded via the E4 Portal

***
How my files are formatted: 
    > Folder for each participant
        > Folder named Empatica
            > Downloaded all folders (originally zipped) containing csv files from Empatica session
            
***

In [220]:
theid = '1663555632_A001FA_2' #This is the subject ID number (name of file)
filesource = './Session/' #This is the source folder that contains all of your participant folders

In [221]:
import csv
import datetime
import math
import time
import collections
from collections import OrderedDict
import os.path
import pandas as pd
import glob

In [222]:
def readFile(file):
    dict = OrderedDict()

    with open(file, 'rt') as csvfile:
        reader = csv.reader(csvfile, delimiter='\n')
        i =0;
        for row in reader:
            if(i==0):
                timestamp=row[0]
                print(timestamp)
                timestamp=float(timestamp)+3600*8 #Time Zone Correction - will need to change depending on time zone! (if +8 UTC use "+"3600*"8")
                #print(timestamp)
            elif(i==1):
                hertz = float(row[0])
            elif(i==2):
                dict[timestamp]=row[0]
            else:
                timestamp = timestamp + 1.0/hertz
                dict[timestamp]=row[0]
            i = i+1.0
    return dict

In [223]:
def formatfile(file, idd, typed):
    EDA = {}
    EDA = readFile(file = file)
    EDA =  {datetime.datetime.utcfromtimestamp(k).strftime('%Y-%m-%d %H:%M:%S.%f'): v for k, v in EDA.items()}
    EDAdf = pd.DataFrame.from_dict(EDA, orient='index', columns=['EDA'])
    EDAdf['EDA'] = EDAdf['EDA'].astype(float)
    
    EDAdf['Datetime'] =EDAdf.index
    EDAdf['Datetime'] = pd.to_datetime(EDAdf['Datetime'], format='%Y-%m-%dT%H:%M:%S.%f')
    EDAdf  = EDAdf.set_index('Datetime')
    
    out_filename = (filesource + idd + '/output_' + typed + '.csv')
    EDAdf.to_csv(out_filename, header=False)
    print('Done')

In [224]:
def importandexport(idd, typed):
    configfiles = glob.glob((filesource + idd + '/' + typed + '.csv'))
    print(configfiles)
    
    [formatfile(file, idd, typed) for file in configfiles]
    print(('Completed Import and Export of:' + typed))

In [225]:
listtyped = ['EDA','TEMP', 'HR','BVP'] 
[importandexport(theid, typed) for typed in listtyped]

['./Session/1663555632_A001FA_2/EDA.csv']
1663555632.000000
Done
Completed Import and Export of:EDA
['./Session/1663555632_A001FA_2/TEMP.csv']
1663555632.000000
Done
Completed Import and Export of:TEMP
['./Session/1663555632_A001FA_2/HR.csv']
1663555642.000000
Done
Completed Import and Export of:HR
['./Session/1663555632_A001FA_2/BVP.csv']
1663555632.00
Done
Completed Import and Export of:BVP


[None, None, None, None]

In [226]:
# configfiles = glob.glob((filesource + theid + "/EDA" + '.csv'))
# print(configfiles)
    

## Import & Format ACC

In [227]:
def processAcceleration(x,y,z):
    x = float(x)
    y = float(y)
    z = float(z) 
    return {'x':x,'y':y,'z':z}

In [228]:
def readAccFile(file):
    dict = OrderedDict()
    
    with open(file, 'rt') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i=0;
        for row in reader:
            if(i == 0):
                timestamp = float(row[0])+3600*8 #Time Zone Correction
            elif(i == 1):    
                hertz=float(row[0])
            elif(i == 2):
                dict[timestamp]= processAcceleration(row[0],row[1],row[2])
            else:
                timestamp = timestamp + 1.0/hertz 
                dict[timestamp] = processAcceleration(row[0],row[1],row[2])
            i = i + 1
        return dict

In [229]:
def formatAccfile(file, idd, typed):
    EDA = {}
    EDA = readAccFile(file = file)
    EDA =  {datetime.datetime.utcfromtimestamp(k).strftime('%Y-%m-%d %H:%M:%S.%f'): v for k, v in EDA.items()}
    EDAdf = pd.DataFrame.from_dict(EDA, orient='index', columns=['x', 'y', 'z'])
    
    EDAdf['x'] = EDAdf['x'].astype(float)
    EDAdf['y'] = EDAdf['x'].astype(float)
    EDAdf['z'] = EDAdf['x'].astype(float)
    
    EDAdf['Datetime'] =EDAdf.index
    EDAdf['Datetime'] = pd.to_datetime(EDAdf['Datetime'], format='%Y-%m-%dT%H:%M:%S.%f')
    EDAdf  = EDAdf.set_index('Datetime')
    
    out_filename = (filesource + idd + '/output_' + typed + '.csv')
    EDAdf.to_csv(out_filename, header=False)
    print('Done')

In [230]:
def importandexportAcc(idd, typed):
    configfiles = glob.glob((filesource + idd + '/' + typed + '.csv'))
    print(configfiles)
    
    [formatAccfile(file, idd, typed) for file in configfiles]
    print(('Completed Import and Export of:' + typed))

In [231]:
importandexportAcc(theid, 'ACC') 

['./Session/1663555632_A001FA_2/ACC.csv']
Done
Completed Import and Export of:ACC


## Import & Format IBI

In [232]:
def importIBI(file, idd, typed):
    IBI = pd.read_csv(file, header=None)
    timestampstart = float(IBI[0][0])+3600*8
    IBI[0] = (IBI[0][1:len(IBI)]).astype(float)+timestampstart
    IBI = IBI.drop([0])
    IBI[0] = IBI[0].apply(lambda x: datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S.%f'))
    IBI  = IBI.set_index(0)
    
    out_filename = (filesource + idd + '/output_' + typed + '.csv')
    IBI.to_csv(out_filename, header=False)
    print('Done')

In [233]:
def importandexportIBI(idd, typed):
    configfiles = glob.glob((filesource + idd + '/' + typed + '.csv'))
    print(configfiles)
    
    [importIBI(file, idd, typed) for file in configfiles]
    print(('Completed Import and Export of:' + typed))

In [234]:
importandexportIBI(theid, 'IBI') 

['./Session/1663555632_A001FA_2/IBI.csv']


EmptyDataError: No columns to parse from file

***
Resources:
* Empatica Timestamp Explanation: https://support.empatica.com/hc/en-us/articles/202800715-Session-start-time-format-and-synchronization-
* GitHub withfunctions modified here: https://github.com/Ev4ngelos/EmpaticaBiophysicalSync/blob/master/E4BioSync.py