In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.signal import butter, lfilter, freqz

pd.options.display.float_format = '{:.4f}'.format
%matplotlib inline

# Prototype Dangerous Driving Detection

For this implementation:
- We are not applying any filter
- We are not adjusting for gravity
- Resample + sliding windows
- Within the sliding window, find instances of 'jerk' (first derivative of acceleration) and other relevant features
- Form a feature vector for each bookingID
- Pass the feature vector of all bookingID into a machine learning model
- Refer to https://en.wikipedia.org/wiki/Jerk_(physics)#Physiological_effects_and_human_perception

#### Discomfort indicators 
from https://www.diva-portal.org/smash/get/diva2:950643/FULLTEXT01.pdf
- Uneven driving, sometimes called “pumping” which may cause motion sickness.
- Heavy breaking and sharp acceleration
- Recurring sharp cornering with high quasi-static lateral accelerations
- Jerks, which are perceived as uncomfortable, create insecurity and affect the ability ofmaintaining balance.

### Load data and process basic features

In [2]:
df = pd.read_csv('safety/features/part-00000-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv')
df_labels = pd.read_csv('safety/labels/part-00000-e9445087-aa0a-433b-a7f6-7f4c19d78ad6-c000.csv')

# Merge the label file with telematics file
df = pd.merge(df,df_labels, on='bookingID')

## Features
# Speed m/s to km/h
df['Speed_kmh'] = df['Speed'].apply(lambda x : x * 3.6)

# Converts 'seconds' to time delta
df['seconds_delta'] = pd.to_timedelta(df['second'],unit='s') 


### Convenience Functions

In [3]:
# Convience function to filter sensor data
def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=4):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

def lowpassFilterAccelGyro(df):
    order = 3
    fs = 60   # sample rate, Hz
    accel_cutoff = 2 # desired cutoff frequency of the filter, Hz
    gyro_cutoff = 0.5
    
    df['acceleration_x_fltr'] = butter_lowpass_filter(df['acceleration_x'], accel_cutoff, fs, order)
    df['acceleration_y_fltr'] = butter_lowpass_filter(df['acceleration_y'], accel_cutoff, fs, order)
    df['acceleration_z_fltr'] = butter_lowpass_filter(df['acceleration_z'], accel_cutoff, fs, order)
    
    df['gyro_x_fltr'] = butter_lowpass_filter(df['gyro_x'], gyro_cutoff, fs, order)
    df['gyro_y_fltr'] = butter_lowpass_filter(df['gyro_y'], gyro_cutoff, fs, order)
    df['gyro_z_fltr'] = butter_lowpass_filter(df['gyro_z'], gyro_cutoff, fs, order)
    
def resampleSensorData(df, period_col = 'seconds_delta', resample_rate = '1S'):
    return df.set_index(period_col).resample(resample_rate).mean().interpolate()

# Convinience function to plot chart for accelerometer and gyroscope
def drawAccelGyroChart(df):
    f, (ax1, ax2, ax3) = plt.subplots(3, 1,figsize=(16,6))
    # Accelerometer
    try:
        temp_df = df.set_index('seconds_delta')
    except:
        temp_df = df
    temp_df['acceleration_x'].plot(c='red', ax=ax1)
    temp_df['acceleration_y'].plot(c='blue', ax=ax1)
    temp_df['acceleration_z'].plot(c='green', ax=ax1)
    # Gyroscope
    temp_df['gyro_x'].plot(c='red', ax=ax2)
    temp_df['gyro_y'].plot(c='blue', ax=ax2)
    temp_df['gyro_z'].plot(c='green', ax=ax2)
    
    temp_df['Speed_kmh'].plot(c='green', ax=ax3)

    ax1.set_title("Accelerometer")
    ax2.set_title("Gyroscope")
    
def drawAccelFilterChart(df):
    f, (ax1, ax2, ax3) = plt.subplots(3, 1,figsize=(16,6))
    # Accelerometer
    try:
        temp_df = df.set_index('seconds_delta')
    except:
        temp_df = df
        
    temp_df['acceleration_x'].plot(c='red', ax=ax1)
    temp_df['acceleration_x_fltr'].plot(c='blue', ax=ax1)
    
    temp_df['acceleration_y'].plot(c='red', ax=ax2)
    temp_df['acceleration_y_fltr'].plot(c='blue', ax=ax2)
    
    temp_df['acceleration_z'].plot(c='red', ax=ax3)
    temp_df['acceleration_z_fltr'].plot(c='blue', ax=ax3)

    ax1.set_title("x-axis")
    ax2.set_title("y-axis")
    ax3.set_title("z-axis")

### Preprocessing for every booking
- Resample observation
- Filter using low pass or band filter
- Compute Singular acceleration vector
- Calculate vector magnitude
- Adjust for gravity (?)


In [4]:
def buildDrivingFeatures(df):
    # Returns df contain features
    df_temp = resampleSensorData(df)
    lowpassFilterAccelGyro(df_temp)
    
    df_temp['accel_magnitude'] = np.sqrt(df_temp['acceleration_x']**2+df_temp['acceleration_y']**2
                                         +df_temp['acceleration_z']**2)
    df_temp['accel_magnitude_fltr'] = np.sqrt(df_temp['acceleration_x_fltr']**2+df_temp['acceleration_y_fltr']**2
                                         +df_temp['acceleration_z_fltr']**2)
    return df_temp

### Feature Vector

for every id:
    created feature_vector(bookingDF)
    
def feature_vector(bookingDF) 
<br>
    empty_feature_df
<br>
    empty_feature_df['feature1'] = func_add_feature1(bookingDF)
    <br>
    empty_feature_df['feature2'] = func_add_feature2(bookingDF)

In [28]:
def createFeatureVector(df, bookingID):
    feature_array = [bookingID]
    feature_dict = {}
    
    # put functions to extract features here
    feature_dict['bookingID'] = bookingID
    feature_dict['average_speed'] = df['Speed_kmh'].mean()
    feature_dict['stdDev_speed'] = df['Speed_kmh'].std()
    feature_dict['max_speed'] = df['Speed_kmh'].max()
    feature_dict['variance_speed'] = df['Speed_kmh'].var()
    
    return feature_dict

In [None]:
# Test creating feature vectors
temp_feature_arr = []
for bookingID in df['bookingID'].unique():
    temp_feature_df = pd.DataFrame()
    # Filter and sort values
    df_temp = df[df['bookingID'] == bookingID]
    
    print(bookingID)
    # resample to 1 seconds
    df_temp = resampleSensorData(df_temp, period_col = 'seconds_delta', resample_rate = '1S')

    # create feature vector
    temp_feature_arr.append(createFeatureVector(df_temp,bookingID))
    
    del df_temp
    
feature_df = pd.DataFrame(temp_feature_arr)
feature_df.set_index('bookingID', inplace=True)

1202590843006
274877907034
884763263056
1073741824054
1056561954943
1185410973787
163208757379
884763262976
841813590178
300647710810
1211180777477
1236950581379
1099511627855
1176821039151
1176821039224
1417339207759
1065151889565
163208757322
1108101562481
833223655481
292057776139
249108103315
1211180777501
1288490188931
936302870667
867583393919
94489280599
1357209665649
1494648619075
1683627180179
704374636606
8589934603
627065225263
730144440410
1382979469343
1451698946155
463856467995
712964571171
962072674361
627065225265
635655159892
128849019034
515396075612
1108101562437
1073741824087
592705486863
970662608985
1623497637904
51539607586
188978561041
498216206382
249108103350
412316860583
704374636616
1108101562521
987842478124
1176821039233
996432412804
1374389534854
163208757301
755914244232
1529008357524
979252543584
472446402733
463856468017
661424963629
420906795020
1563368095916
1228360646845
919123001356
498216206387
1159641170047
1503238553748
1657857376336
15719580303

In [35]:
feature_df

Unnamed: 0_level_0,average_speed,max_speed,stdDev_speed,variance_speed
bookingID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1202590843006,15.8925,63.5499,17.1576,294.382
274877907034,46.1634,97.02,40.079,1606.3284
884763263056,40.8506,96.6673,35.4344,1255.5944
1073741824054,23.8682,67.0928,18.7827,352.7896
1056561954943,47.0158,84.2457,26.1117,681.8184


In [26]:
df['bookingID'].unique()[:5]

array([1202590843006,  274877907034,  884763263056, 1073741824054,
       1056561954943])

In [9]:
df_temp = (df
        .query("bookingID == '1202590843006'")
        .sort_values(by='seconds_delta')
        .set_index('seconds_delta')
        )
df_temp

Unnamed: 0_level_0,bookingID,Accuracy,Bearing,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,second,Speed,label,Speed_kmh
seconds_delta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
00:00:25,1202590843006,6.1810,316.0000,1.3147,9.5030,3.7511,0.0149,-0.0023,-0.0317,25.0000,7.9988,1,28.7957
00:00:29,1202590843006,10.3680,326.0000,5.0354,7.9671,5.2730,-0.0247,-0.4853,-0.2051,29.0000,5.1328,1,18.4779
00:00:31,1202590843006,10.3970,51.0000,4.9702,8.3408,4.8616,-0.1718,-0.8928,-0.3700,31.0000,2.4323,1,8.7562
00:00:43,1202590843006,14.7420,130.0000,1.7143,8.8398,3.2737,-0.0341,-0.0446,0.0739,43.0000,14.5131,1,52.2471
00:00:45,1202590843006,17.8250,134.0000,2.2291,8.3128,5.5607,-0.1777,0.0279,0.0374,45.0000,14.1713,1,51.0166
00:00:46,1202590843006,16.0270,135.0000,1.6185,7.7068,6.3373,-0.0101,-0.0694,0.0142,46.0000,12.4964,1,44.9871
00:00:57,1202590843006,8.6020,35.0000,1.5050,8.8604,4.2590,0.0178,0.0992,0.0328,57.0000,2.1379,1,7.6963
00:00:59,1202590843006,7.9930,31.0000,1.7420,8.8948,3.8663,0.0024,0.0520,0.0294,59.0000,1.1354,1,4.0873
00:01:00,1202590843006,7.5650,42.0000,1.5830,8.8756,3.9962,-0.0062,-0.0328,-0.0242,60.0000,0.6727,1,2.4217
00:01:01,1202590843006,4.8280,42.0000,1.6355,9.0045,4.0722,0.0090,-0.0156,-0.0224,61.0000,0.0000,1,0.0000


In [30]:
feature_df = pd.DataFrame(columns=['bookingID','average_speed','stdDev_speed','max_speed','variance_speed'])
temp_feature_arr = []
temp_feature_arr.append(createFeatureVector(df_temp,'1202590843006'))
feature_df = pd.DataFrame(temp_feature_arr)
# feature_df = pd.concat([feature_df, pd.Series(temp_feature_arr)], axis=1)

In [31]:
feature_df

Unnamed: 0,average_speed,bookingID,max_speed,stdDev_speed,variance_speed
0,19.1665,1202590843006,63.5499,20.2938,411.839


In [33]:
feature_df = pd.DataFrame()
feature_df['average_speed'] = df_temp['Speed'].mean()
feature_df['stdDev_speed'] = df_temp['Speed'].std()

In [41]:
feature_df['average_speed'] =  pd.Series(df_temp['Speed_kmh'].mean())
feature_df['stdDev_speed'] = pd.Series(df_temp['Speed_kmh'].std())

In [42]:
feature_df

Unnamed: 0,average_speed,stdDev_speed
0,19.1665,20.2938
