In [1]:
#.csv files in this archive are in the following format:
# The first row is the initial time of the session expressed as unix timestamp in UTC.
# The second row is the sample rate expressed in Hz.

# HR.csv
# Average heart rate extracted from the BVP signal.The first row is the initial time of the session expressed as unix timestamp in UTC.
# The second row is the sample rate expressed in Hz.

# EDA.csv
# Data from the electrodermal activity sensor expressed as microsiemens (μS).

# ACC.csv
# Data from 3-axis accelerometer sensor. The accelerometer is configured to measure acceleration in the range [-2g, 2g]. ]
# Therefore the unit in this file is 1/64g.
# Data from x, y, and z axis are respectively in first, second, and third column.


In [2]:
import os
import re
import cv2
import math

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import datetime
import warnings
warnings.filterwarnings("ignore")

In [3]:
def gps_data(gpspath):
    gps_info = pd.read_csv(gpspath,sep=',')
    gps_info = gps_info[['subject_id', 'time', 'latitude','longitude','gpsspeed']]
    gps_info = gps_info.rename(index=str, columns={"subject_id": "Driver", "time": "timestamp", "gpsspeed":"speed"})
    
    unix_time = pd.to_datetime(gps_info.timestamp).astype(np.int64)//10**9
    gps_info.insert(loc=1, column='unix_time', value=unix_time)
    
    gps_info.loc[(gps_info.latitude == 0), 'latitude'] = np.nan
    gps_info.loc[(gps_info.longitude == 0), 'longitude'] = np.nan
    return gps_info

In [4]:
def acc_data(accpath):
    acc_info = pd.read_csv(accpath,header =None, sep=',', names=['acc_x','acc_y','acc_z'])
    acc_info =  acc_info[['acc_x']]
    # start time & sample rate
    start_time = acc_info.iloc[0,0]
    sample_rate = 1/acc_info.iloc[1,0]
    # unix time series generate
    acc_info = acc_info.iloc[2:].reset_index(drop=True)
    unix_time = pd.Series(range(0,len(acc_info)))*sample_rate + start_time
    acc_info.insert(loc=0, column='unix_time', value=unix_time)
    # unix group to 1 second
    acc_info['unix_time'] = acc_info['unix_time'].astype(np.int64)
    acc_info = acc_info.groupby(['unix_time'])['acc_x'].mean().reset_index()
    #acc_info.insert(loc=1, column='timestamp',value=pd.to_datetime(acc_info.unix_time, unit='s'))
    return acc_info

In [5]:
def eda_data(edapath):
    eda_info = pd.read_csv(edapath,header =None, sep=',', names=['eda'])
    # start time & sample rate
    start_time = eda_info.iloc[0,0]
    sample_rate = 1/eda_info.iloc[1,0]
    
    eda_info = eda_info.iloc[2:].reset_index(drop=True)
    unix_time = pd.Series(range(0,len(eda_info)))*sample_rate + start_time
    eda_info.insert(loc=0, column='unix_time', value=unix_time)
    
    eda_info['unix_time'] = eda_info['unix_time'].astype(np.int64)
    eda_info = eda_info.groupby(['unix_time'])['eda'].mean().reset_index()
    return eda_info

In [6]:
def hr_data(hrpath):
    hr_info = pd.read_csv(hrpath,header =None, sep=',', names=['HR'])
    # start time & sample rate
    start_time = hr_info.iloc[0,0]
    sample_rate = 1/hr_info.iloc[1,0]
    
    hr_info = hr_info.iloc[2:].reset_index(drop=True)
    unix_time = pd.Series(range(0,len(hr_info)))*sample_rate + start_time
    hr_info.insert(loc=0, column='unix_time', value=unix_time)
    
    hr_info['unix_time'] = hr_info['unix_time'].astype(np.int64)
    hr_info = hr_info.groupby(['unix_time'])['HR'].mean().reset_index()
    return hr_info

### empatica seperate drivers data process

In [64]:
empatica_path = '/Users/tongge/Box/DriverStress/Data/Empatica/EMP007'
gpsbb_path = '/Users/tongge/Box/DriverStress/Data/BlackBox/EMP007'

In [66]:
# emp data
hr_df = pd.DataFrame()
acc_df = pd.DataFrame()
eda_df = pd.DataFrame()
for i in os.listdir(empatica_path):
    for j in os.listdir(os.path.join(empatica_path,i)):
        if re.match('HR.csv', j):
            hrpath = os.path.join(empatica_path,i,j)
            hr_info = hr_data(hrpath)
            hr_df = hr_df.append(hr_info, ignore_index=True)
        elif re.match('ACC.csv', j):
            acc_path = os.path.join(empatica_path,i,j)
            acc_info = acc_data(acc_path)
            acc_df = acc_df.append(acc_info, ignore_index=True)
        elif re.match('EDA.csv', j):
            eda_path = os.path.join(empatica_path,i,j)
            eda_info = eda_data(eda_path)
            eda_df = eda_df.append(eda_info, ignore_index=True)
print('Done!')
emp_df = pd.merge(pd.merge(hr_df,eda_df,on='unix_time'),acc_df,on='unix_time')

Done!


In [67]:
emp_df.shape

(1240181, 4)

In [68]:
# gps data
gps_df = pd.DataFrame()
for i in os.listdir(gpsbb_path):
    for j in os.listdir(os.path.join(gpsbb_path,i)):
        if re.search('1hz',j):                 
            gps_path = os.path.join(gpsbb_path,i,j)
            if os.path.getsize(gps_path)!=0:  
                try:
                    gps_info = gps_data(gps_path)
                    gps_df = gps_df.append(gps_info, ignore_index=True)
                except ParserError:#,NameError) as e:
                    pass
print('Done!')

Done!


In [69]:
gps_df.shape

(133771, 6)

In [70]:
emp_data_007 = pd.merge(gps_df,emp_df,on='unix_time').sort_values(by='unix_time')

In [71]:
emp_data_007.shape

(80713, 9)

In [72]:
emp_data_007.to_csv('/Users/tongge/Box/driver_stress_tong/Empatica_data/emp_v2/emp_data_007.csv', index=None)

### empatica data summary

In [74]:
empatica_path = '/Users/tongge/Box/driver_stress_tong/Empatica_data/emp_v2'
empatica_data = pd.DataFrame()
for fpath in os.listdir(empatica_path):
    if fpath.endswith('.csv'):
        emp_df = pd.read_csv(os.path.join(empatica_path,fpath), sep=',')
        empatica_data = empatica_data.append(emp_df, ignore_index=True)
print('Done!')

Done!


In [75]:
empatica_data.head()

Unnamed: 0,Driver,unix_time,timestamp,latitude,longitude,speed,HR,eda,acc_x
0,EMP000,1504903497,2017-09-08 20:44:57,41.255962,-96.007935,0.0,93.0,0.098319,-10.21875
1,EMP000,1504903498,2017-09-08 20:44:58,41.255962,-96.007935,0.0,108.5,0.093195,-22.21875
2,EMP000,1504903499,2017-09-08 20:44:59,41.255962,-96.007935,0.01,100.0,0.088071,-5.6875
3,EMP000,1504903500,2017-09-08 20:45:00,41.255962,-96.007935,0.01,89.25,0.094796,-15.59375
4,EMP000,1504903501,2017-09-08 20:45:01,41.255962,-96.007935,0.01852,87.0,0.093835,-19.875


In [76]:
empatica_data['timestamp'] = pd.to_datetime(empatica_data.timestamp)

In [77]:
empatica_data.insert(loc=3, column='date', value=empatica_data['timestamp'].apply(lambda x: x.strftime('%Y%m%d')))


In [78]:
empatica_data.insert(loc=4, column='time', value=empatica_data['timestamp'].apply(lambda x: x.strftime('%H:%M:%S')))


### empatica data v2

In [80]:
empatica_data_v2 = empatica_data.copy()

In [81]:
empatica_data_v2 = empatica_data_v2.sort_values(by=['Driver','unix_time'])

In [82]:
empatica_data_v2['gradient_HR'] = empatica_data_v2.HR.diff().shift(-1)
empatica_data_v2['gradient_eda'] = empatica_data_v2.eda.diff().shift(-1)
empatica_data_v2['gradient_acc'] = empatica_data_v2.acc_x.diff().shift(-1)

In [83]:
empatica_data_v2['interval'] = empatica_data_v2.unix_time.diff().shift(-1)

In [84]:
empatica_data_v2.gradient_HR.iloc[-1] = 0
empatica_data_v2.gradient_eda.iloc[-1] = 0
empatica_data_v2.gradient_acc.iloc[-1] = 0
empatica_data_v2.interval.iloc[-1] = 0

In [85]:
empatica_data_v2.loc[empatica_data_v2['interval']!=1,'gradient_HR']=0
empatica_data_v2.loc[empatica_data_v2['interval']!=1,'gradient_eda']=0
empatica_data_v2.loc[empatica_data_v2['interval']!=1,'gradient_acc']=0

In [86]:
empatica_data_v2 = empatica_data_v2.drop(['interval'], axis=1)

In [87]:
empatica_data_v2.head()

Unnamed: 0,Driver,unix_time,timestamp,date,time,latitude,longitude,speed,HR,eda,acc_x,gradient_HR,gradient_eda,gradient_acc
0,EMP000,1504903497,2017-09-08 20:44:57,20170908,20:44:57,41.255962,-96.007935,0.0,93.0,0.098319,-10.21875,15.5,-0.005124,-12.0
1,EMP000,1504903498,2017-09-08 20:44:58,20170908,20:44:58,41.255962,-96.007935,0.0,108.5,0.093195,-22.21875,-8.5,-0.005124,16.53125
2,EMP000,1504903499,2017-09-08 20:44:59,20170908,20:44:59,41.255962,-96.007935,0.01,100.0,0.088071,-5.6875,-10.75,0.006725,-9.90625
3,EMP000,1504903500,2017-09-08 20:45:00,20170908,20:45:00,41.255962,-96.007935,0.01,89.25,0.094796,-15.59375,-2.25,-0.000961,-4.28125
4,EMP000,1504903501,2017-09-08 20:45:01,20170908,20:45:01,41.255962,-96.007935,0.01852,87.0,0.093835,-19.875,-1.5,0.0,0.375


In [88]:
empatica_data_v2.shape

(365228, 14)

In [89]:
empatica_data_v2.to_csv('/Users/tongge/Box/driver_stress_tong/Empatica_data/summary_new/empatica_data_new.csv', index=None)

### empatica data v3

In [90]:
empatica_data_v3 = pd.read_csv('/Users/tongge/Box/driver_stress_tong/Empatica_data/summary_new/empatica_data_new.csv')


In [91]:
value = empatica_data_v3['unix_time'].apply(lambda row: datetime.datetime.fromtimestamp(row))

In [92]:
empatica_data_v3.insert(loc=5, column='timestamp_local', value=value)


In [93]:
empatica_data_v3.insert(loc=6, column='time_local', value=empatica_data_v3['timestamp_local'].apply(lambda row: row.strftime('%H:%M:%S')))
empatica_data_v3.insert(loc=6, column='date_local', value=empatica_data_v3['timestamp_local'].apply(lambda row: row.strftime('%Y%m%d')))


In [95]:
#empatica_data_v3.to_csv('/Users/tongge/Box/driver_stress_tong/Empatica_data/summary_new/empatica_data_mod.csv', index=None)




### final version

In [99]:
empatica_data_v3.shape

(365228, 17)

In [31]:
empatica_data_mod = pd.read_csv('/Users/tongge/Box/driver_stress_tong/Empatica_data/summary_new/empatica_data_mod.csv')

In [32]:
empatica_data_mod = empatica_data_mod.sort_values(by=['Driver','unix_time'])

In [33]:
empatica_data_mod = empatica_data_mod.drop(['gradient_HR','gradient_eda','gradient_acc'], axis=1)

In [34]:
empatica_data_mod['gradient_HR'] = empatica_data_mod.HR.diff().shift(0)
empatica_data_mod['gradient_eda'] = empatica_data_mod.eda.diff().shift(0)
empatica_data_mod['gradient_acc'] = empatica_data_mod.acc_x.diff().shift(0)
empatica_data_mod['interval'] = empatica_data_mod.unix_time.diff().shift(0)

In [36]:
empatica_data_mod.gradient_HR.iloc[0] = 0
empatica_data_mod.gradient_eda.iloc[0] = 0
empatica_data_mod.gradient_acc.iloc[0] = 0
empatica_data_mod.interval.iloc[0] = 0

In [38]:
empatica_data_mod.loc[empatica_data_mod['interval']!=1,'gradient_HR']=0
empatica_data_mod.loc[empatica_data_mod['interval']!=1,'gradient_eda']=0
empatica_data_mod.loc[empatica_data_mod['interval']!=1,'gradient_acc']=0

In [40]:
empatica_data_mod = empatica_data_mod.drop(['interval'], axis=1)

In [43]:
empatica_data_mod.loc[empatica_data_mod.gradient_eda < 0, 'jump_eda'] = 0
empatica_data_mod.loc[empatica_data_mod.gradient_eda >= 0, 'jump_eda'] = empatica_data_mod.gradient_eda

In [45]:
empatica_data_mod.to_csv('/Users/tongge/Box/driver_stress_tong/Empatica_data/summary_new/empatica_data_mod2.csv', index=None)
