In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime as dt
import plotly.express as px
import plotly.figure_factory as ff
from tqdm.notebook import tqdm
import chart_studio.tools
import chart_studio.plotly as py

In [2]:
#Read in J1939Faults. low_memory = false because of warning about mixed type columns.
faults = pd.read_csv('../data/J1939Faults.csv',
                     index_col = 'RecordID',
                     parse_dates = ['EventTimeStamp', 'LocationTimeStamp'],
                     low_memory = False)
#remove faults that occur at service locations. Had to remove the last digit because the rows dont have it.
service_location1 = faults[(faults["Latitude"] == 36.066666) & (faults["Longitude"] == -86.434722)]

service_location2 = faults[(faults["Latitude"] == 35.588333) & (faults["Longitude"] == -86.443888)]

service_location3 = faults[(faults["Latitude"] == 36.1950) & (faults["Longitude"] ==  -83.174722)]

#concat service_locations for removal
service_locations = pd.concat([service_location1, service_location2, service_location3])

#drop service locations
faults_cleaned = faults.drop(service_locations.index)

#remove EquipmentIDs that are longer than 5 Characters per the README
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].map(len) <= 5]

#removed columns that only contained null values or would not be valuable because they're unique to the truck.
faults_cleaned = faults_cleaned.drop(['actionDescription', 'faultValue', 'ecuSerialNumber',
                                      'ecuSource', 'MCTNumber'], axis = 1).reset_index()

  mask |= (ar1 == a)


In [3]:
#Limit to years 2015-2020, Had to add down to seconds because it kept changing the datatype after filtering
faults_cleaned = faults_cleaned[(faults_cleaned['EventTimeStamp'] >= '2015-01-01 00:00:01') & (faults_cleaned['EventTimeStamp'] <= '2020-12-31 12:59:59')]




In [4]:
#prepare derates for dummization
faults_cleaned.loc[faults_cleaned['spn'] != 1569, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] != 5246, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] == 5246, 'spn_derate'] = 'full'
faults_cleaned.loc[faults_cleaned['spn'] == 1569, 'spn_derate'] = 'partial'  


faults_cleaned = faults_cleaned.set_index('RecordID')

#get dummies
derates = pd.get_dummies(faults_cleaned['spn_derate'])
derates

#merge back with original dataset
faults_cleaned = faults_cleaned.merge(derates, left_on = 'RecordID', right_on = 'RecordID') 

In [5]:
#sorts and groups by truck. then sorts by event timestamp. this made the diff() calculate correctly 
#    ¯\_(ツ)_/¯
#code shamelessly stolen from https://arccoder.medium.com/pandas-sort-within-groups-e1f3b6a10a3f
faults_cleaned = faults_cleaned.sort_values(['EquipmentID'], ascending=True) \
    .groupby(['EquipmentID'], sort=False) \
    .apply(lambda x: x.sort_values(['EventTimeStamp'], ascending=True)) \
    .reset_index(drop=True)


faults_cleaned['timedelta'] = faults_cleaned.groupby('EquipmentID')['EventTimeStamp'].diff()

In [7]:
#I want to return all of the data for the trucks that derate not just the first 10 rows before a derate
#Create list for trucks that have a full derate
full = faults_cleaned[(faults_cleaned['spn'] == 5246)]
full = full['EquipmentID'].unique()
#create an empty list to hold the derated_trucks
derated_trucks = []
#iterate through the dataframe and return the trucks that derate and the preceeding 10 rows. 
for truck_num in tqdm(full):
    truck = faults_cleaned[faults_cleaned['EquipmentID']==truck_num]
    first_full_index = truck[truck['spn']==5246].index[0]
    last_full_index = truck[truck['spn']==5246].index[-1]
    derated_trucks.append(truck.loc[first_full_index:last_full_index])
#concat the list into a new dataframe   
derated_trucks = pd.concat(derated_trucks)

  0%|          | 0/210 [00:00<?, ?it/s]

In [8]:
both = faults_cleaned[((faults_cleaned['spn']== 5246) | (faults_cleaned['spn']==1569))]

both = both['EquipmentID'].unique()
no_derates = faults_cleaned[~faults_cleaned['EquipmentID'].isin(both)]
#no_derates = no_derates['EquipmentID'].unique()
len(no_derates)

#bring in diagnostics and pivot
diagnostics = pd.read_csv('../data/VehicleDiagnosticOnboardData.csv') \
    .pivot(index = 'FaultId', columns = 'Name', values = 'Value')

faults_all = faults_cleaned.merge(diagnostics, left_index = True, right_on = 'FaultId')

faults_all['DistanceLtd'] = faults_all['DistanceLtd'].str.replace(',','.').astype('float64')

In [9]:
derated_trucks = derated_trucks.merge(diagnostics, left_index = True, right_on = 'FaultId')

derated_trucks['DistanceLtd'] = derated_trucks['DistanceLtd'].str.replace(',','.').astype('float64')

In [24]:
pd.set_option('display.max_rows', 400)

In [25]:
no_derates.value_counts(subset = ['spn'])

spn   
111       82750
829       11579
96        11437
639        8631
97         7438
596        6649
50353      5157
37         4653
629        4389
412        4139
929        3481
2863       3281
1807       3275
886        2805
791        2693
1059       2536
51923      2412
1231       2255
1761       1951
802        1754
793        1442
1067       1386
4096       1367
807        1307
641        1185
611        1008
792         981
3216        966
627         949
798         949
790         935
4364        918
1068        875
1481        838
3464        824
3556        754
171         748
1483        732
3226        720
70          701
5939        694
118         669
168         658
3251        642
789         635
103         623
3610        615
4276        611
0           602
788         593
248         570
1808        547
2659        529
101         493
84          489
91          484
3695        450
1028        434
2623        422
157         418
3241        414
184         408
1

In [26]:
derated_trucks.value_counts(subset = ['spn'])

spn   
111       27450
929       10326
96         2930
829        2080
1569       1545
5246       1157
3226        765
639         663
3821        584
793         543
94          442
789         432
1761        382
791         374
4334        333
3216        311
5394        308
907         255
1045        252
2863        238
3362        233
4094        231
411         190
611         182
110         174
3251        169
50353       154
1231        154
641         147
91          143
3031        139
627         137
175         136
1172        121
3361        111
4340        107
4364         95
596          94
5742         91
1787         91
3364         88
1483         87
245          83
5848         82
4376         80
2623         80
1668         80
1127         78
6802         76
4339         71
792          71
5319         66
3245         66
5743         65
4765         65
4375         64
3610         58
2791         55
1068         55
157          50
171          47
790          46
1

In [36]:
counts = derated_trucks['spn'].value_counts(dropna=False)
top_6 = derated_trucks[derated_trucks['spn'].map(counts)>1156]
top_6.value_counts(subset = 'spn')

spn
111     27450
929     10326
96       2930
829      2080
1569     1545
5246     1157
dtype: int64

In [16]:
y = derated_trucks['DistanceLtd']

In [None]:
top_5_derates
top_5_no_derates

In [10]:
#Merge the trucks that don't derate with the diagnostics data.

no_derates = no_derates.merge(diagnostics, left_index = True, right_on = 'FaultId')

no_derates['DistanceLtd'] = no_derates['DistanceLtd'].str.replace(',','.').astype('float64')

In [17]:
fig = px.scatter(derated_trucks, x=x, y=y, animation_frame="spn",
                 color="spn", hover_name="spn")

ValueError: All arguments should have the same length. The length of argument `y` is 57742, whereas the length of  previously-processed arguments ['x'] is 218

In [42]:
top_6['DistanceLtd'] =top_6['DistanceLtd'].fillna(method='backfill')
top_6['DistanceLtd']= top_6['DistanceLtd'].dropna()
top_6['DistanceLtd'].isna().sum()

0

if a derated truck has to get towed every time it derates, sum the derates grouped by equipmentID and multiply by the amount it costs to have the truck towed. 

In [52]:
derated_111 = derated_trucks[derated_trucks['spn'] == 111]
derated_929 = derated_trucks[derated_trucks['spn'] == 929]
derated_96 = derated_trucks[derated_trucks['spn'] == 96]
derated_829 = derated_trucks[derated_trucks['spn'] == 829]
partial_derate_dist = derated_trucks[derated_trucks['spn'] == 1569]
full_derate_dist = derated_trucks[derated_trucks['spn'] == 5246]
no_111 = no_derates[no_derates['spn'] == 111]
no_829 = no_derates[no_derates['spn'] == 829]
no_96 = no_derates[no_derates['spn'] == 96]
no_639 = no_derates[no_derates['spn'] == 639]
no_97 = no_derates[no_derates['spn'] == 97]
no_596 = no_derates[no_derates['spn'] == 596]
no_929 = no_derates[no_derates['spn'] == 929]

derated_111['DistanceLtd'].fillna(method = 'ffill').dropna()
derated_929['DistanceLtd'].fillna(method = 'ffill').dropna()
derated_96['DistanceLtd'].fillna(method = 'ffill').dropna()
derated_829['DistanceLtd'].fillna(method = 'ffill').dropna()
partial_derate_dist['DistanceLtd'].fillna(method = 'ffill').dropna()
full_derate_dist['DistanceLtd'].fillna(method = 'ffill').dropna()
no_111['DistanceLtd'].fillna(method = 'ffill').dropna()
no_829['DistanceLtd'].fillna(method = 'ffill').dropna()
no_96['DistanceLtd'].fillna(method = 'ffill').dropna()
no_639['DistanceLtd'].fillna(method = 'ffill').dropna()
no_97['DistanceLtd'].fillna(method = 'ffill').dropna()
no_596['DistanceLtd'].fillna(method = 'ffill').dropna()
no_929['DistanceLtd'].fillna(method = 'ffill').dropna()

Derated SPN
111       27450
929       10326
96         2930
829        2080
1569       1545
5246       1157

In [53]:
x1 = derated_111
x2 = derated_929
x3 = derated_96
x4 = derated_829
x5 = partial_derate_dist
x6 = full_derate_dist
x7 = no_111
x8 = no_829
x9 = no_96
x10 = no_639
x11 = no_97
x12 = no_596
x13 = no_929

Unnamed: 0_level_0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,...,FuelTemperature,IgnStatus,IntakeManifoldTemperature,LampStatus,ParkingBrake,ServiceDistance,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure
FaultId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16519,3166440,2015-06-26 10:17:48,Low (Severity Low) Engine Coolant Level,04993120*00000194*042114185815*07700062*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,True,1,...,,False,,17407,,,,,,
16520,3166461,2015-06-26 10:20:15,Low (Severity Low) Engine Coolant Level,04993120*00000194*042114185815*07700062*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,False,1,...,32,True,129.2,1279,False,,6.631196,3276.75,0,3.19
16533,3294859,2015-07-04 00:31:59,Low (Severity Low) Engine Coolant Level,04993120*00000194*042114185815*07700062*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,True,1,...,32,True,75.2,1023,False,,82.15692,3276.75,0,0.87
16534,3294862,2015-07-04 00:35:52,Low (Severity Low) Engine Coolant Level,04993120*00000194*042114185815*07700062*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,False,1,...,,,,1023,,,,,,
16538,3297602,2015-07-04 15:55:51,Low (Severity Low) Engine Coolant Level,04993120*00000194*042114185815*07700062*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,True,1,...,,,,255,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1173715,67756530,2018-11-08 15:23:25,Low (Severity Medium) Engine Coolant Level,PC4__1284P4C_6*,MX16U13D13,PCAR,111,18,False,2,...,,,,22527,,,,,,
1173748,67785402,2018-11-08 22:11:08,Low (Severity Medium) Engine Coolant Level,PC4__1284P4C_6*,MX16U13D13,PCAR,111,18,True,1,...,,True,109.4,17407,False,,59.25357,,100,4.93
1173757,67785421,2018-11-08 22:12:32,Low (Severity Medium) Engine Coolant Level,PC4__1284P4C_6*,MX16U13D13,PCAR,111,18,False,1,...,32,True,140,17407,False,,4.922425,,100,2.9
1173776,68971716,2018-11-20 14:12:25,Low (Severity Medium) Engine Coolant Level,PC4__1284P4C_6*,MX16U13D13,PCAR,111,18,True,1,...,,,,255,,,,,,


No Derates SPN
111       82750
829       11579
96        11437
639        8631
97         7438
596        6649

In [59]:
x1 = derated_111['DistanceLtd'].fillna(method = 'ffill').dropna()
x2 = derated_929['DistanceLtd'].fillna(method = 'ffill').dropna()
x3 = derated_96['DistanceLtd'].fillna(method = 'ffill').dropna()
x4 = derated_829['DistanceLtd'].fillna(method = 'ffill').dropna()
x5 = partial_derate_dist['DistanceLtd'].fillna(method = 'ffill').dropna()
x6 = full_derate_dist['DistanceLtd'].fillna(method = 'ffill').dropna()

hist_data = [x1, x2, x3, x4, x5, x6]

group_labels = ['111', '929', '96', '829', '1569', '5246']

fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug = False)

fig.show()

In [68]:
py.plot(fig, filename = 'top_6_derates', auto_open=True)

'https://plotly.com/~James.a.gilbert/8/'

In [69]:
chart_studio.tools.get_embed('https://plotly.com/~James.a.gilbert/8/')

'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~James.a.gilbert/8.embed" height="525" width="100%"></iframe>'

In [67]:
x7 = no_111['DistanceLtd'].fillna(method = 'ffill').dropna()
x8 = no_829['DistanceLtd'].fillna(method = 'ffill').dropna()
x9 = no_96['DistanceLtd'].fillna(method = 'ffill').dropna()
x10 = no_639['DistanceLtd'].fillna(method = 'ffill').dropna()
x11 = no_97['DistanceLtd'].fillna(method = 'ffill').dropna()
x12 = no_596['DistanceLtd'].fillna(method = 'ffill').dropna()
#x13 = no_929['DistanceLtd'].fillna(method = 'ffill').dropna()

hist_data_no = [x7, x8, x9, x10, x11, x12]
group_labels_no = ['111', '829', '96', '639', '97', '595']


fig_no = ff.create_distplot(hist_data_no, group_labels_no, show_hist=False, show_rug=False)


fig_no.show()

In [70]:
py.plot(fig_no, filename = 'top6_no_derates', auto_open=True)

'https://plotly.com/~James.a.gilbert/10/'

In [71]:
chart_studio.tools.get_embed('https://plotly.com/~James.a.gilbert/10/')

'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~James.a.gilbert/10.embed" height="525" width="100%"></iframe>'

In [64]:
x2 = derated_929['DistanceLtd'].fillna(method = 'ffill').dropna()
x13 = no_929['DistanceLtd'].fillna(method = 'ffill').dropna()

hist_data_929 = [x2, x13]
group_labels_929 = ['derated_929','Not_derated929']


fig_929 = ff.create_distplot(hist_data_929, group_labels_no, show_hist=False, show_rug=False)


fig_929.show()

PlotlyError: Oops! Your data lists or ndarrays should be the same length.

In [65]:
len(x2)

10326

In [63]:
len(x13)

3479

In [72]:
derated_trucks.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57742 entries, 100 to 1174631
Data columns (total 43 columns):
 #   Column                     Non-Null Count  Dtype          
---  ------                     --------------  -----          
 0   ESS_Id                     57742 non-null  int64          
 1   EventTimeStamp             57742 non-null  datetime64[ns] 
 2   eventDescription           54128 non-null  object         
 3   ecuSoftwareVersion         47940 non-null  object         
 4   ecuModel                   54107 non-null  object         
 5   ecuMake                    54107 non-null  object         
 6   spn                        57742 non-null  int64          
 7   fmi                        57742 non-null  int64          
 8   active                     57742 non-null  bool           
 9   activeTransitionCount      57742 non-null  int64          
 10  EquipmentID                57742 non-null  object         
 11  Latitude                   57742 non-null  float64

In [75]:
derated_trucks['EngineTimeLtd'] = derated_trucks['EngineTimeLtd'].str.replace(',','.').astype('float64')


In [80]:
derated_trucks['EngineTimeLtd'].describe()

count     28224.000000
mean       7207.191268
std        3486.728492
min           0.100000
25%        5263.912500
50%        7398.375000
75%        9519.500000
max      112770.200000
Name: EngineTimeLtd, dtype: float64