In [1]:
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from datetime import datetime, timedelta

import pylab

In [2]:
datapath = '../data/'

In [3]:
import glob
files = glob.glob(datapath+'*.csv')
files[0]

'../data/FC3DC207-9459-4CBE-8AB5-0E0AC77D5FEC.csv'

In [4]:
dfs = [pd.read_csv(f) for f in files]
dfs[1] = dfs[1].rename(columns={" timestamp ": "timestamp"})

In [5]:
for df in dfs:
    df['mag'] = np.sqrt(df['x']**2+df['y']**2+df['z']**2)
    df['time'] = pd.to_datetime(df['timestamp'], unit='ms')
dfs[0].head(5)

Unnamed: 0,timestamp,type,lat,lon,height,accuracy,speed,bearing,x,y,z,mag,time
0,1431035000000.0,gps,41.978753,-87.893018,194.760063,10.0,14.36,139.570312,,,,,2015-05-07 21:46:58.761430
1,1431035000000.0,accelerometer,,,,,,,-0.075974,-0.448395,-0.867188,0.979206,2015-05-07 21:46:58.834430
2,1431035000000.0,accelerometer,,,,,,,-0.079025,-0.435837,-0.919876,1.020966,2015-05-07 21:46:58.932430
3,1431035000000.0,accelerometer,,,,,,,-0.069183,-0.456039,-0.921722,1.030694,2015-05-07 21:46:59.030430
4,1431035000000.0,accelerometer,,,,,,,-0.059341,-0.456772,-0.896149,1.007593,2015-05-07 21:46:59.129430


In [6]:
def prepare_dfs(dfs,window):
    sorted_dfs = []
    rolling_mean = []
    rolling_std = []
    for df in dfs:
        _temp = df.sort_values(by='time')
        _temp = _temp.set_index(['time'])

        _mean = _temp.rolling(window).mean()
        _std = _temp.rolling(window).std()
    
        sorted_dfs.append(_temp)
        rolling_mean.append(_mean)
        rolling_std.append(_std)
    
    return sorted_dfs, rolling_mean, rolling_std




In [7]:
def Find_events(dfs, variable, window):
    events_df = pd.DataFrame()
    drivers = []
    for i in range(0,25):
        test = dfs[i].sort_values(by='time')
        test = test.set_index(['time'])

        test2 = test.rolling(window).mean()
        test2_std = test.rolling(window).std()

        for j in range(0,len(test.index)):
            if test[variable][j] > (test2[variable][j]+(2.9*test2_std[variable][j])):
                if test[variable][j] > (test[variable].mean()+(5.0*test[variable].std())):
                    print(i,test2.index[j],test[variable][j],test2[variable][j],test2_std[variable][j],test[variable].max(),test[variable].mean(),test[variable].std() )
                    events_df = events_df.append(test.iloc[j])
                    drivers.append(i)
    events_df['driver']=drivers
    return events_df
                

In [8]:
def Gather_info(event, dfs, gps, time):
    
    speed_mean_before=[]
    speed_mean_after =[]
    acc_mean_before=[]
    acc_mean_after =[]    
    distance_sum_after = []
    bearing_range_before = []
    bearing_range_after = []
    
    for i in range(0,len(event.index)):
        
        before = dfs[event.driver[i]][ (dfs[event.driver[i]].index> (event.index[i]-timedelta(seconds=time) )) & 
                       (dfs[event.driver[i]].index< (event.index[i]))]

        after = dfs[event.driver[i]][ (dfs[event.driver[i]].index> (event.index[i])) & 
                       (dfs[event.driver[i]].index< (event.index[i]+timedelta(seconds=time) ))]
        
        speed_mean_before.append(before.speed.mean())
        speed_mean_after.append(after.speed.mean())
        acc_mean_before.append(before.mag.mean())
        acc_mean_after.append(after.mag.mean())
        
        
        gps_after = gps[event.driver[i]][ gps[event.driver[i]].index> (event.index[i])]
        distance_sum_after.append(gps_after.distance.sum())
        
        gps_before = gps[event.driver[i]][ (gps[event.driver[i]].index> (event.index[i]-timedelta(seconds=10) )) & 
                       (gps[event.driver[i]].index< (event.index[i]))]
        
        gps_after = gps[event.driver[i]][ (gps[event.driver[i]].index> (event.index[i])) & 
                       (gps[event.driver[i]].index< (event.index[i]+timedelta(seconds=10) ))]
        
        bearing_range_before.append(gps_before.bearing.max() - gps_before.bearing.min())
        bearing_range_after.append(gps_after.bearing.max() - gps_after.bearing.min())
        if event.driver[i] == 3:
            print(gps_before.bearing.max(),gps_before.bearing.min())
            print(gps_after.bearing.max(),gps_after.bearing.min())
            print(bearing_range_before, bearing_range_after)
        
        
    event['speed_before'] = speed_mean_before
    event['speed_after']  = speed_mean_after     
    event['mag_before']   = acc_mean_before    
    event['mag_after']    = acc_mean_after
    event['distance_after'] = distance_sum_after
    event['bearing_before'] = bearing_range_before
    event['bearing_after'] = bearing_range_after
    
    return event
    
    
    

In [9]:
def Plot_crash(event, raw_profile, rolling_mean):
    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(9,9))
    
    ax1.plot(raw_profile.index,raw_profile.mag)
    ax1.plot(rolling_mean.index, rolling_mean.mag)
    ax1.plot(event.index,event.mag, marker='*',markersize=20,c='k')
 
    ax2.plot(raw_profile.index,raw_profile.mag)
    ax2.plot(rolling_mean.index, rolling_mean.mag)
    ax2.plot(event.index,event.mag, marker='*',markersize=20,c='k')
    ax2.set_xlim([event.index - timedelta(seconds=30),event.index + timedelta(seconds=30)])
             
    speed_df = raw_profile[['speed','lat','lon','height']].dropna(axis=0)
    ax3.plot(speed_df.index,speed_df.speed)
    ax3.plot([event.index,event.index],[speed_df.speed.min(),speed_df.speed.max()],c='k')
    
    ax4.plot(speed_df.index,speed_df.speed)
    ax4.plot([event.index,event.index],[speed_df.speed.min(),speed_df.speed.max()],c='k')
    ax4.set_xlim([event.index - timedelta(seconds=30),event.index + timedelta(seconds=30)])
    
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-1, event.speed_before[0])
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-3, event.speed_after[0])
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-5, event.mag_before[0])
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-7, event.mag_after[0])
    
    
    
    
    plt.show()

In [10]:
def Calc_distance(dfs):
    new_df = []
    for df in dfs:
        _df = df[['lat','lon','bearing']].dropna(axis=0)
        distance = []
        distance.append(0.)
        summed_distance = []
        summed_distance.append(0.)
        for i in range(1,len(_df.index)):
            lat1 = _df.lat.iloc[i-1]
            lat2 = _df.lat.iloc[i]
            lon1 = _df.lon.iloc[i-1]
            lon2 = _df.lon.iloc[i]

            a = (np.sin((lat2 - lat1)/2))**2 + (np.cos(lat1) * np.cos(lat2) * (np.sin((lon2 - lon1)/2))**2)
            c = 2 * np.arctan2(np.sqrt(a),np.sqrt(1 - a))
            #sofar = np.sum(distance)
            d = (6371.0*c)
            distance.append(d)
            summed_distance.append(d + summed_distance[i-1])
        _df['distance'] = distance
        _df['summed_distance'] = summed_distance
        new_df.append(_df)
    return new_df
        

In [11]:
new_dfs = Calc_distance(dfs)
sorted_dfs, rolling_mean, rolling_std = prepare_dfs(dfs,'1s')
gps_dfs = Calc_distance(sorted_dfs)
event = Find_events(dfs,'mag','1s')
event = Gather_info(event, sorted_dfs, gps_dfs, 30)

0 2015-05-07 21:51:56.474430 2.54167704146 1.17974123545 0.45567960114 3.07437785147 1.0252136609824674 0.15636141511869553
1 2015-06-20 22:22:47.640070 1.8141591553 1.06003345821 0.257170416316 2.08899488177 1.0000514440092911 0.07058346365538637
3 2015-03-29 16:49:19.847200 6.19279813334 1.39232124893 1.61349417437 8.17820442838 1.0148427262151816 0.22484109289041454
4 2015-04-20 22:48:51.953430 2.03850173609 1.08464944084 0.322948850671 2.03850173609 1.0114027795450182 0.10232779309330343
6 2015-03-19 21:32:49.043870 2.58414834802 1.0708769514 0.512740383164 7.49083739885 1.0310558508151255 0.29910192886533754
6 2015-03-19 21:36:22.383870 2.53515295875 1.13820692773 0.480446886636 7.49083739885 1.0310558508151255 0.29910192886533754
6 2015-03-19 21:39:51.137870 2.61012842349 1.19344632947 0.482099134015 7.49083739885 1.0310558508151255 0.29910192886533754
7 2015-04-13 21:15:43.407980 2.27768156958 1.09696841471 0.397023563022 7.87947406308 1.0154665043115036 0.18138339003605158
7 20

In [12]:
def Plot_crash(event, raw_profile, rolling_mean, gps_dfs):
    f, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3, 2, figsize=(9,13.5))
    
    ax1.plot(raw_profile.index,raw_profile.mag)
    ax1.plot(rolling_mean.index, rolling_mean.mag)
    ax1.plot(event.index,event.mag, marker='*',markersize=20,c='k')
    ax1.set_title("Acceleration - Full profile")
 
    ax2.plot(raw_profile.index,raw_profile.mag)
    ax2.plot(rolling_mean.index, rolling_mean.mag)
    ax2.plot(event.index,event.mag, marker='*',markersize=20,c='k')
    ax2.set_xlim([event.index - timedelta(seconds=30),event.index + timedelta(seconds=30)])
    ax2.set_title("Acceleration - -30sec to +30sec of event")
             
    speed_df = raw_profile[['speed','lat','lon','height']].dropna(axis=0)
    ax3.plot(speed_df.index,speed_df.speed)
    ax3.plot([event.index,event.index],[speed_df.speed.min(),speed_df.speed.max()],c='k')
    ax3.set_title("Speed - Full profile")
    
    ax4.plot(speed_df.index,speed_df.speed)
    ax4.plot([event.index,event.index],[speed_df.speed.min(),speed_df.speed.max()],c='k')
    ax4.set_xlim([event.index - timedelta(seconds=30),event.index + timedelta(seconds=30)])
    ax4.set_title("Speed - -30sec to +30sec of event")
    
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-1, event.speed_before[0])
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-3, event.speed_after[0])
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-5, event.mag_before[0])
    ax4.text(event.index+timedelta(seconds=5), speed_df.speed.max()-7, event.mag_after[0])
    
    ax5.plot(gps_dfs.index, gps_dfs.distance)
    ax5.plot([event.index,event.index],[gps_dfs.distance.min(),gps_dfs.distance.max()],c='k')
    ax5.set_title("Distance - Full profile")
    ax5.set_xlim([event.index - timedelta(seconds=30),gps_dfs.index.max()])
    ax5.text(event.index+timedelta(seconds=5), gps_dfs.distance.max(), event.distance_after[0])
    
    #ax6.plot(gps_dfs.index, gps_dfs.distance)
    #ax6.plot([event.index,event.index],[gps_dfs.distance.min(),gps_dfs.distance.max()],c='k')
    #ax6.set_xlim([event.index - timedelta(seconds=30),gps_dfs.index.max()])
    
    ax6.plot(gps_dfs.index, gps_dfs.bearing)
    ax6.plot([event.index,event.index],[gps_dfs.bearing.min(),gps_dfs.bearing.max()],c='k')
    ax6.set_xlim([event.index - timedelta(seconds=30),event.index + timedelta(seconds=30)])
    ax6.set_title("Bearing - -30sec to +30sec of event")
    ax6.text(event.index+timedelta(seconds=5), 250, event.bearing_before[0])
    ax6.text(event.index+timedelta(seconds=5), 230, event.bearing_after[0])
    
    pylab.savefig('../figs/'+str(event.iloc[0:1].driver[0])+'_'+str(i)+'.png', bbox_inches=0)
    plt.close()
    
    
    
    
    #plt.show()

In [13]:
#for i in range(0,len(event.index)):
#    Plot_crash(event.iloc[i:i+1],sorted_dfs[event.driver[i]], rolling_mean[event.driver[i]], gps_dfs[event.driver[i]])



In [14]:
event.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 37 entries, 2015-05-07 21:51:56.474430 to 2015-05-17 16:08:21.698600
Data columns (total 20 columns):
accuracy          0 non-null float64
bearing           0 non-null float64
height            0 non-null float64
lat               0 non-null float64
lon               0 non-null float64
mag               37 non-null float64
speed             0 non-null float64
timestamp         37 non-null float64
type              37 non-null object
x                 37 non-null float64
y                 37 non-null float64
z                 37 non-null float64
driver            37 non-null int64
speed_before      35 non-null float64
speed_after       34 non-null float64
mag_before        37 non-null float64
mag_after         37 non-null float64
distance_after    37 non-null float64
bearing_before    33 non-null float64
bearing_after     33 non-null float64
dtypes: float64(18), int64(1), object(1)
memory usage: 7.3+ KB


In [15]:
event[event.speed_before>event.speed_after].info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17 entries, 2015-05-07 21:51:56.474430 to 2015-05-17 16:04:23.392600
Data columns (total 20 columns):
accuracy          0 non-null float64
bearing           0 non-null float64
height            0 non-null float64
lat               0 non-null float64
lon               0 non-null float64
mag               17 non-null float64
speed             0 non-null float64
timestamp         17 non-null float64
type              17 non-null object
x                 17 non-null float64
y                 17 non-null float64
z                 17 non-null float64
driver            17 non-null int64
speed_before      17 non-null float64
speed_after       17 non-null float64
mag_before        17 non-null float64
mag_after         17 non-null float64
distance_after    17 non-null float64
bearing_before    17 non-null float64
bearing_after     17 non-null float64
dtypes: float64(18), int64(1), object(1)
memory usage: 2.8+ KB


In [16]:
event[(event.speed_before>event.speed_after) & (event.distance_after<10) & ((event.bearing_after - event.bearing_before)>100)].info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3 entries, 2015-03-29 16:49:19.847200 to 2015-06-20 20:55:57.826120
Data columns (total 20 columns):
accuracy          0 non-null float64
bearing           0 non-null float64
height            0 non-null float64
lat               0 non-null float64
lon               0 non-null float64
mag               3 non-null float64
speed             0 non-null float64
timestamp         3 non-null float64
type              3 non-null object
x                 3 non-null float64
y                 3 non-null float64
z                 3 non-null float64
driver            3 non-null int64
speed_before      3 non-null float64
speed_after       3 non-null float64
mag_before        3 non-null float64
mag_after         3 non-null float64
distance_after    3 non-null float64
bearing_before    3 non-null float64
bearing_after     3 non-null float64
dtypes: float64(18), int64(1), object(1)
memory usage: 504.0+ bytes


In [17]:
#event_cut = event[(event.speed_before>event.speed_after) & (event.distance_after<10) & ((event.bearing_after - event.bearing_before)>100)]

In [18]:
event_cut = event[(event.speed_before>event.speed_after) & (event.distance_after<10) & ((event.bearing_after - event.bearing_before)>100)]
for i in range(0,len(event_cut.index)):
    Plot_crash(event_cut.iloc[i:i+1],sorted_dfs[event_cut.driver[i]], rolling_mean[event_cut.driver[i]], gps_dfs[event_cut.driver[i]])


