In [83]:
import pandas as pd
import numpy as np
import glob

from os.path import basename
from datetime import datetime

In [84]:
ifolder = "../data/"

ifiles = [f for f in glob.glob(ifolder + "*.csv", recursive=True)]

# not all files have the same columns, so specify which to import 
csv_cols = ["timestamp", "accel_x", "accel_y", "accel_z", "gyro_roll", "gyro_pitch", "gyro_yaw", "label"]

# array of dataframes; used to merge the data from all dataframes
df_array = []

# iterate input files 
for ifile in ifiles:
    
    # used to identify data source
    fname = basename(ifile).split('.')[0]
    print(fname)

    #with open(ifile) as csv_file:  
    data = pd.read_csv(ifile, usecols = csv_cols)
    data["data_source"] = fname
    
    # data transformation
    # Convert Unix time to DateTime object; Unit: microseconds
    data["datetime"] = pd.to_datetime(data['timestamp'], unit='us')

    # making sure the rows are sequential 
    data.sort_values(by="datetime", ascending=True, inplace=True)

    # calc scalar accelleration
    data["accel"] = np.sqrt(pow(data.accel_x, 2) + pow(data.accel_y, 2) + pow(data.accel_z, 2))

    # calc jerk

    # diff returns a timedelta object; converts to microseconds
    data["delta_t"] = data["datetime"].diff().dt.microseconds

    # convert delta_t to seconds
    data["delta_t"] = data["delta_t"] / 1E6

    # delta acceleration 
    data["delta_a"] = data["accel"].diff()

    data["jerk"] = data.delta_a/data.delta_t

    df_array.append(data)

normal_longitudinal_acceleration_1549908723215048
          timestamp   accel_x   accel_y   accel_z  gyro_roll  gyro_pitch  \
0  1549908723215048  0.027003  0.158736  0.753566   0.019349   -0.015032   
1  1549908723265048  0.047686  0.277788  0.757603  -0.019485   -0.004387   
2  1549908723315048  0.023556  0.223406  0.746896  -0.006711   -0.012363   
3  1549908723365048  0.053432  0.191071  0.756023   0.006588    0.005726   
4  1549908723415048  0.037919  0.160206  0.744438  -0.001393   -0.029378   

   gyro_yaw  label                                        data_source  
0  0.044264      0  normal_longitudinal_acceleration_1549908723215048  
1  0.067111      0  normal_longitudinal_acceleration_1549908723215048  
2  0.090479      0  normal_longitudinal_acceleration_1549908723215048  
3  0.121281      0  normal_longitudinal_acceleration_1549908723215048  
4  0.159513      0  normal_longitudinal_acceleration_1549908723215048  
aggressive_bump_1550163148318484
          timestamp   accel_

In [100]:
# concatenate all dataframes; the index needs to be reset 
df = pd.concat(df_array).reset_index(drop=True)

In [101]:
df.head()

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_roll,gyro_pitch,gyro_yaw,label,data_source,datetime,accel,delta_t,delta_a,jerk
0,1549908723215048,0.027003,0.158736,0.753566,0.019349,-0.015032,0.044264,0,normal_longitudinal_acceleration_1549908723215048,2019-02-11 18:12:03.215048,0.770576,,,
1,1549908723265048,0.047686,0.277788,0.757603,-0.019485,-0.004387,0.067111,0,normal_longitudinal_acceleration_1549908723215048,2019-02-11 18:12:03.265048,0.808333,0.05,0.037757,0.755142
2,1549908723315048,0.023556,0.223406,0.746896,-0.006711,-0.012363,0.090479,0,normal_longitudinal_acceleration_1549908723215048,2019-02-11 18:12:03.315048,0.779948,0.05,-0.028386,-0.567714
3,1549908723365048,0.053432,0.191071,0.756023,0.006588,0.005726,0.121281,0,normal_longitudinal_acceleration_1549908723215048,2019-02-11 18:12:03.365048,0.781623,0.05,0.001675,0.033506
4,1549908723415048,0.037919,0.160206,0.744438,-0.001393,-0.029378,0.159513,0,normal_longitudinal_acceleration_1549908723215048,2019-02-11 18:12:03.415048,0.762425,0.05,-0.019198,-0.383959


In [102]:
# Mean acceleration values per driver event type (example: aggressive bump) and per label;

grouped = df[["data_source", "label", "accel"]].groupby(["data_source", "label"]).mean()

In [103]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,accel
data_source,label,Unnamed: 2_level_1
aggressive_bump_1550163148318484,0,0.797093
aggressive_bump_1550163148318484,1,0.956922
aggressive_longitudinal_acceleration_1549653321089461,0,0.760171
aggressive_longitudinal_acceleration_1549653321089461,1,0.825468
aggressive_turn_1549625320507325,0,0.768733
aggressive_turn_1549625320507325,1,0.832888
normal_longitudinal_acceleration_1549908723215048,0,0.763528
normal_mixed_1549901031015048,0,0.762132
normal_mixed_1550054269957615,0,0.762704
normal_turn_1549626293857325,0,0.763334


In [105]:
# get the maximum 

idx = df['jerk'].idxmax()

print(idx)

print(idx, df["jerk"][idx] )
print(df.head())


print(df.loc[idx]['datetime'])

print(df.loc[idx]['jerk'])

20769
20769 52.769725244872326
          timestamp   accel_x   accel_y   accel_z  gyro_roll  gyro_pitch  \
0  1549908723215048  0.027003  0.158736  0.753566   0.019349   -0.015032   
1  1549908723265048  0.047686  0.277788  0.757603  -0.019485   -0.004387   
2  1549908723315048  0.023556  0.223406  0.746896  -0.006711   -0.012363   
3  1549908723365048  0.053432  0.191071  0.756023   0.006588    0.005726   
4  1549908723415048  0.037919  0.160206  0.744438  -0.001393   -0.029378   

   gyro_yaw  label                                        data_source  \
0  0.044264      0  normal_longitudinal_acceleration_1549908723215048   
1  0.067111      0  normal_longitudinal_acceleration_1549908723215048   
2  0.090479      0  normal_longitudinal_acceleration_1549908723215048   
3  0.121281      0  normal_longitudinal_acceleration_1549908723215048   
4  0.159513      0  normal_longitudinal_acceleration_1549908723215048   

                    datetime     accel  delta_t   delta_a      jerk  
0 2