In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA

In [2]:
def timestamp2date(timestamp):
    """Convert timestamp to datetime."""
    timeArray = time.localtime(timestamp)
    formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
    return formatted_time
def get_tgAcceleration(df):
    """Calculate tangential acceleration."""
    dVehSpdLgtA = df["VehSpdLgtA"].diff() / 3.6
    dt = df["t"].diff()
    df["tg_acceleration"] = dVehSpdLgtA / dt
    return df
def kde2D(x, y, bandwidth, xbins=10j, ybins=10j, **kwargs): 
    """Build 2D kernel density estimate (KDE)."""
    
    # create grid of sample locations (default: 10x10)
    xx, yy = np.mgrid[x.min():x.max():xbins, 
                      y.min():y.max():ybins]

    xy_sample = np.vstack([yy.ravel(), xx.ravel()]).T
    xy_train  = np.vstack([y, x]).T

    kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
    kde_skl.fit(xy_train)

    # score_samples() returns the log-likelihood of the samples
    z = np.exp(kde_skl.score_samples(xy_sample))
    zz = np.reshape(z, xx.shape)
    z_norm = z / z.sum()
    z_norm= z_norm.ravel()
    return xx,yy,zz,z_norm

In [3]:
# read data
df = pd.read_csv(r"/Users/liuyihang/Desktop/brakedata.csv")
df[2022:2032]

Unnamed: 0,t,BrkPedlPsdBrkPedlPsd,VehSpdLgtA
2022,1664775501,NoYes1_No,15.11997
2023,1664775502,NoYes1_No,15.1317
2024,1664775503,NoYes1_No,15.11606
2025,1664775504,NoYes1_No,15.04568
2026,1664775505,NoYes1_No,15.16689
2027,1664775506,NoYes1_No,15.28419
2028,1664775507,NoYes1_No,15.31156
2029,1664775508,NoYes1_No,15.27246
2030,1664775509,NoYes1_No,15.16689
2031,1664775510,NoYes1_No,15.04568


In [4]:
# convert timestamp
df["date"] = df["t"].apply(timestamp2date)

In [5]:
# convert m/s to km/h
df["VehSpdLgtA"] = df["VehSpdLgtA"] * 3.6
df = df[["t","date","VehSpdLgtA","BrkPedlPsdBrkPedlPsd"]]
df[2022:2032]

Unnamed: 0,t,date,VehSpdLgtA,BrkPedlPsdBrkPedlPsd
2022,1664775501,2022-10-03 13:38:21,54.431892,NoYes1_No
2023,1664775502,2022-10-03 13:38:22,54.47412,NoYes1_No
2024,1664775503,2022-10-03 13:38:23,54.417816,NoYes1_No
2025,1664775504,2022-10-03 13:38:24,54.164448,NoYes1_No
2026,1664775505,2022-10-03 13:38:25,54.600804,NoYes1_No
2027,1664775506,2022-10-03 13:38:26,55.023084,NoYes1_No
2028,1664775507,2022-10-03 13:38:27,55.121616,NoYes1_No
2029,1664775508,2022-10-03 13:38:28,54.980856,NoYes1_No
2030,1664775509,2022-10-03 13:38:29,54.600804,NoYes1_No
2031,1664775510,2022-10-03 13:38:30,54.164448,NoYes1_No


In [6]:
# drop reduplicative records
df = df.drop_duplicates(subset = ["t"]).sort_values(by = ["t"]).reset_index(drop = True)

In [7]:
# partition journey
df["dt"] = df["t"].diff()
df["stop"] = df["dt"] != 1
df["journeyID"] = df["stop"].cumsum()
df.loc[df["dt"] != 1].head(5)

Unnamed: 0,t,date,VehSpdLgtA,BrkPedlPsdBrkPedlPsd,dt,stop,journeyID
0,1664773468,2022-10-03 13:04:28,20.26944,NoYes1_No,,True,1
1264,1664774743,2022-10-03 13:25:43,27.603036,NoYes1_No,12.0,True,2
9503,1664785018,2022-10-03 16:16:58,0.0,NoYes1_No,2037.0,True,3
14325,1664793376,2022-10-03 18:36:16,0.0,NoYes1_No,3537.0,True,4


In [8]:
# interpolate speed
df["VehSpdLgtA"] = df.groupby("journeyID")["VehSpdLgtA"].apply(lambda v: v.interpolate(limit_direction = "both"))
df["VehSpdLgtA"] = df["VehSpdLgtA"].fillna(0)
df.isnull().sum()

t                       0
date                    0
VehSpdLgtA              0
BrkPedlPsdBrkPedlPsd    0
dt                      1
stop                    0
journeyID               0
dtype: int64

In [9]:
# calculate tangential acceleration
df = df.groupby("journeyID").apply(get_tgAcceleration)

In [10]:
# omit the first row of every journey (beacause of NaN of tg_acceleration)
df = df.groupby("journeyID").apply(lambda x:x.iloc[1:]).reset_index(drop = True)

In [11]:
# omit meaningless journeies(v == 0 or cumsum(t) less than 3min)
df["is_meaningless"] = df.groupby("journeyID")["VehSpdLgtA"].transform(lambda v: True if (np.sum(v) == 0 or len(v) < 3 * 60) else False)
df = df[~df["is_meaningless"]].reset_index(drop = True)
df = df[["t","date","journeyID","VehSpdLgtA","tg_acceleration","BrkPedlPsdBrkPedlPsd"]]
df[1264-5:1264+5]

Unnamed: 0,t,date,journeyID,VehSpdLgtA,tg_acceleration,BrkPedlPsdBrkPedlPsd
1259,1664774728,2022-10-03 13:25:28,1,6.19344,0.41837,NoYes1_No
1260,1664774729,2022-10-03 13:25:29,1,11.40156,1.4467,NoYes1_No
1261,1664774730,2022-10-03 13:25:30,1,17.679456,1.74386,NoYes1_No
1262,1664774731,2022-10-03 13:25:31,1,25.364952,2.13486,NoYes1_No
1263,1664774744,2022-10-03 13:25:44,2,32.853384,1.45843,NoYes1_No
1264,1664774745,2022-10-03 13:25:45,2,39.0609,1.72431,NoYes1_No
1265,1664774746,2022-10-03 13:25:46,2,43.734132,1.29812,NoYes1_No
1266,1664774747,2022-10-03 13:25:47,2,46.464876,0.75854,NoYes1_No
1267,1664774748,2022-10-03 13:25:48,2,48.688884,0.61778,NoYes1_No
1268,1664774749,2022-10-03 13:25:49,2,50.53284,0.51221,NoYes1_No


In [12]:
#recognize sudden brakes

df['suddenbrake'] = 0
df.loc[(df['BrkPedlPsdBrkPedlPsd']=='NoYes1_Yes') & (df['tg_acceleration']<-1.47 ), 'suddenbrake'] = 1

df['suddenbrakesec'] = 0

if df.loc[0,'suddenbrake'] == 1:
    df.loc[0,'suddenbrakesec'] = 1

for i in range(1,len(df)):
    if  df.loc[i,'suddenbrake'] == 1:
        df.loc[i,'suddenbrakesec'] = df.loc[i-1,'suddenbrakesec'] + 1
    if df.loc[i-1,'suddenbrake'] == 1 and df.loc[i,'suddenbrake'] == 0:
        df.loc[i,'suddenbrakesec'] = 0
        

df['suddenbrakeseconds'] = 0
for i in range(0,len(df)-1):      
    if  df.loc[i,'suddenbrake'] == 1 and df.loc[i+1,'suddenbrake'] == 0:
        df.loc[i,'suddenbrakeseconds']= df.loc[i,'suddenbrakesec']

df[2751:2758]

Unnamed: 0,t,date,journeyID,VehSpdLgtA,tg_acceleration,BrkPedlPsdBrkPedlPsd,suddenbrake,suddenbrakesec,suddenbrakeseconds
2751,1664776232,2022-10-03 13:50:32,2,52.967988,-1.30203,NoYes1_Yes,0,0,0
2752,1664776233,2022-10-03 13:50:33,2,46.324116,-1.84552,NoYes1_Yes,1,1,0
2753,1664776234,2022-10-03 13:50:34,2,39.328344,-1.94327,NoYes1_Yes,1,2,0
2754,1664776235,2022-10-03 13:50:35,2,33.247512,-1.68912,NoYes1_Yes,1,3,0
2755,1664776236,2022-10-03 13:50:36,2,26.88516,-1.76732,NoYes1_Yes,1,4,0
2756,1664776237,2022-10-03 13:50:37,2,20.83248,-1.6813,NoYes1_Yes,1,5,5
2757,1664776238,2022-10-03 13:50:38,2,16.736364,-1.13781,NoYes1_Yes,0,0,0


In [13]:
df_brake = df[df['suddenbrakeseconds'] !=0 ]
df_brake = df_brake.reset_index(drop=True)
df_brake = df_brake[["t","date","journeyID","VehSpdLgtA","tg_acceleration","suddenbrakeseconds"]]

df_brake['st'] = df_brake['t']- df_brake['suddenbrakeseconds']+1
df_brake["start"] = df_brake["st"].apply(timestamp2date)

df_brake['suddenbrakeID'] = 1
for i in range(1,len(df_brake)): 
    if  df_brake.loc[i,'journeyID'] == df_brake.loc[i-1,'journeyID']:
        df_brake.loc[i,"suddenbrakeID"] = df_brake.loc[i-1,"suddenbrakeID"] + 1
        
df_brake['end'] = df_brake['date']
df_brake = df_brake[["journeyID","suddenbrakeID","start","end","suddenbrakeseconds","VehSpdLgtA","tg_acceleration"]]

print(df_brake)

    journeyID  suddenbrakeID                start                  end  \
0           1              1  2022-10-03 13:21:55  2022-10-03 13:21:55   
1           1              2  2022-10-03 13:23:49  2022-10-03 13:23:51   
2           2              1  2022-10-03 13:26:04  2022-10-03 13:26:06   
3           2              2  2022-10-03 13:28:32  2022-10-03 13:28:32   
4           2              3  2022-10-03 13:28:36  2022-10-03 13:28:36   
5           2              4  2022-10-03 13:34:48  2022-10-03 13:34:48   
6           2              5  2022-10-03 13:34:52  2022-10-03 13:34:52   
7           2              6  2022-10-03 13:37:25  2022-10-03 13:37:25   
8           2              7  2022-10-03 13:41:32  2022-10-03 13:41:35   
9           2              8  2022-10-03 13:50:33  2022-10-03 13:50:37   
10          2              9  2022-10-03 13:50:41  2022-10-03 13:50:41   
11          2             10  2022-10-03 13:53:10  2022-10-03 13:53:12   
12          2             11  2022-10-

In [14]:
#recognize rapid speeding-ups

df['speedingup'] = 0
df.loc[df['tg_acceleration']>1.47 , 'speedingup'] = 1

df['speedingupsec'] = 0

if df.loc[0,'speedingup'] == 1:
    df.loc[0,'speedingsec'] = 1

for i in range(1,len(df)):
    if  df.loc[i,'speedingup'] == 1:
        df.loc[i,'speedingupsec'] = df.loc[i-1,'speedingupsec'] + 1
    if df.loc[i-1,'speedingup'] == 1 and df.loc[i,'speedingup'] == 0:
        df.loc[i,'speedingupsec'] = 0
        
df['speedingupseconds'] = 0
for i in range(0,len(df)-1):      
    if  df.loc[i,'speedingup'] == 1 and df.loc[i+1,'speedingup'] == 0:
        df.loc[i,'speedingupseconds']= df.loc[i,'speedingupsec']

df = df[["t","date","journeyID","VehSpdLgtA","tg_acceleration","BrkPedlPsdBrkPedlPsd","speedingup","speedingupsec","speedingupseconds"]]

df[7553:7560]

Unnamed: 0,t,date,journeyID,VehSpdLgtA,tg_acceleration,BrkPedlPsdBrkPedlPsd,speedingup,speedingupsec,speedingupseconds
7553,1664781034,2022-10-03 15:10:34,2,6.19344,1.20428,NoYes1_No,0,0,0
7554,1664781035,2022-10-03 15:10:35,2,12.330576,1.70476,NoYes1_No,1,1,0
7555,1664781036,2022-10-03 15:10:36,2,20.48058,2.26389,NoYes1_No,1,2,0
7556,1664781037,2022-10-03 15:10:37,2,27.335592,1.90417,NoYes1_No,1,3,0
7557,1664781038,2022-10-03 15:10:38,2,32.740776,1.50144,NoYes1_No,1,4,4
7558,1664781039,2022-10-03 15:10:39,2,37.611072,1.35286,NoYes1_No,0,0,0
7559,1664781040,2022-10-03 15:10:40,2,41.890176,1.18864,NoYes1_No,0,0,0


In [15]:
df_speedingup = df[df['speedingupseconds'] !=0 ]
df_speedingup= df_speedingup.reset_index(drop=True)
df_speedingup = df_speedingup[["t","date","journeyID","VehSpdLgtA","tg_acceleration","speedingupseconds"]]

df_speedingup['st'] = df_speedingup['t']- df_speedingup['speedingupseconds']+1
df_speedingup["start"] = df_speedingup["st"].apply(timestamp2date)

df_speedingup['speedingupID'] = 1
for i in range(1,len(df_speedingup)): 
    if  df_speedingup.loc[i,'journeyID'] == df_speedingup.loc[i-1,'journeyID']:
        df_speedingup.loc[i,"speedingupID"] = df_speedingup.loc[i-1,"speedingupID"] + 1

df_speedingup['end'] = df_speedingup['date']
df_speedingup = df_speedingup[["journeyID","speedingupID","start","end","speedingupseconds","VehSpdLgtA","tg_acceleration"]]

print(df_speedingup)

    journeyID  speedingupID                start                  end  \
0           1             1  2022-10-03 13:22:04  2022-10-03 13:22:04   
1           1             2  2022-10-03 13:22:26  2022-10-03 13:22:26   
2           1             3  2022-10-03 13:23:57  2022-10-03 13:23:58   
3           1             4  2022-10-03 13:24:11  2022-10-03 13:24:11   
4           1             5  2022-10-03 13:25:00  2022-10-03 13:25:00   
..        ...           ...                  ...                  ...   
92          4            15  2022-10-03 19:24:16  2022-10-03 19:24:16   
93          4            16  2022-10-03 19:24:20  2022-10-03 19:24:20   
94          4            17  2022-10-03 19:25:08  2022-10-03 19:25:09   
95          4            18  2022-10-03 19:29:23  2022-10-03 19:29:24   
96          4            19  2022-10-03 19:30:30  2022-10-03 19:30:30   

    speedingupseconds  VehSpdLgtA  tg_acceleration  
0                   1   12.907692          2.58842  
1                