I will be working with fault code data and vehicle onboard diagnostic data to try and predict an upcoming full derate. These are indicated by an SPN 5246.

In [1]:
import pandas as pd
from datetime import datetime
import geopandas as gpd
from geopy.distance import distance
import numpy as np

In [2]:
parse_dates=['EventTimeStamp']

In [3]:
faults = pd.read_csv('data/J1939Faults.csv', low_memory=False, parse_dates=['EventTimeStamp'])
service_fault = pd.read_excel('data/ServiceFaultCodes.xlsx')
vehicle_Diagnostics = pd.read_csv('data/VehicleDiagnosticOnboardData.csv')

  for idx, row in parser.parse():


In [4]:
# filter out rows that have more than 5 characters in the 'EquipmentID' column
faults =faults[faults['EquipmentID'].str.len() <= 5]

Basic EDA to check how the data looks like:

In [5]:
#get rid of faultValue and actionDescription since they haven't been filled in. 
#FaultId = RecordID

In [6]:
faults = faults.drop(columns = ["actionDescription", "faultValue"])

In [7]:
faults.shape

(1185166, 18)

In [8]:
faults.isna().sum()

RecordID                      0
ESS_Id                        0
EventTimeStamp                0
eventDescription          60366
ecuSoftwareVersion       295827
ecuSerialNumber          342772
ecuModel                  64649
ecuMake                   64649
ecuSource                     0
spn                           0
fmi                           0
active                        0
activeTransitionCount         0
EquipmentID                   0
MCTNumber                     0
Latitude                      0
Longitude                     0
LocationTimeStamp             0
dtype: int64

Remove faults occurring in the vicinity of the service locations at (36.0666667, -86.4347222), (35.5883333, -86.4438888), and (36.1950, -83.174722)

In [9]:
faults = faults[(faults['Latitude'] != '36.0666667') & (faults['Longitude'] != '-86.4347222')]
faults = faults[(faults['Latitude'] != '35.5883333') & (faults['Longitude'] != '-86.4438888')]
faults = faults[(faults['Latitude'] != '36.1950') & (faults['Longitude'] != '-83.174722')]

To filter out the events near the service stations:

In [10]:
for lat, lon in [(36.0666667, -86.4347222), (35.5883333, -86.4438888), (36.1950, -83.174722)]:
    
    faults = faults.loc[~((abs(lat - faults['Latitude']) <= 0.01) &
                          (abs(lon - faults['Longitude']) <= 0.01))]

In [11]:
faults.loc[faults['spn'] == 5246]

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
2089,2090,1011009,2015-02-23 05:05:44,,05290170*03015749*051914190353*09400015*G1*BDR*,79642446,6X1u13D1500000000,CMMNS,0,5246,0,True,1,1630,105329900,40.733009,-74.087777,2015-02-23 05:08:23.000
2971,2972,1026305,2015-02-23 15:54:22,,unknown,unknown,unknown,unknown,0,5246,0,True,1,1487,105369355,28.077361,-81.897083,2015-02-23 15:54:58.000
5713,5714,1070646,2015-02-25 13:53:08,,unknown,unknown,unknown,unknown,0,5246,0,True,1,1329,105400037,39.399583,-82.974768,2015-02-25 13:56:31.000
5809,5810,1071907,2015-02-25 14:47:00,,unknown,unknown,unknown,unknown,0,5246,0,False,1,1329,105400037,39.399629,-82.974814,2015-02-25 14:46:56.000
6534,6535,1097942,2015-02-26 22:24:29,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,5246,0,True,1,1419,105355995,37.596805,-85.865555,2015-02-26 22:25:05.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1179423,1239564,119961467,2020-02-06 08:03:09,,04358814*06005963*051718174436*09401683*G1*BDR*,79897320,6X1u13D1500000000,CMMNS,0,5246,0,False,1,1854,105385876,35.943472,-83.823240,2020-02-06 08:03:05.000
1181700,1241841,120905759,2020-02-13 13:32:39,,04358814*06026985*051718174436*09401683*G1*BDR*,79903054,6X1u13D1500000000,CMMNS,0,5246,0,True,1,1872,105301976,35.707268,-81.397037,2020-02-13 13:33:15.000
1181717,1241858,120910417,2020-02-13 14:01:40,,04358814*06026985*051718174436*09401683*G1*BDR*,79903054,6X1u13D1500000000,CMMNS,0,5246,0,False,1,1872,105301976,35.708101,-81.395648,2020-02-13 13:59:51.000
1181996,1242137,121038018,2020-02-14 11:21:54,,,,,,49,5246,19,True,88,302,105418777,38.349490,-85.708425,2020-02-14 11:22:30.000


selected unique trucks with partial derate and complete derate (I can change the name, currently I went with ‘total’ derate) and then I compared them.

In [12]:
all_trucks = faults['EquipmentID'].unique()
partial_derate = faults.loc[(faults['spn'] == 1569) & (faults['fmi'] == 31)]['EquipmentID'].unique()
total_derate = faults.loc[faults['spn'] == 5246]['EquipmentID'].unique()

partial_derate_only = partial_derate[np.isin(partial_derate, total_derate, invert=True)]
total_derate_only = total_derate[np.isin(total_derate, partial_derate, invert=True)]
partial_and_total_derate = np.intersect1d(partial_derate, total_derate)
no_derate = all_trucks[np.isin(all_trucks, partial_derate_only, invert=True) | np.isin(all_trucks, total_derate_only, invert=True)]

In [22]:
faults

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,unknown,0,111,17,True,2,1439,105354361,38.857638,-84.626851,2015-02-21 11:34:25.000
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,unknown,unknown,11,629,12,True,127,1439,105354361,38.857638,-84.626851,2015-02-21 11:35:10.000
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,unknown,11,1807,2,False,127,1369,105336226,41.421250,-87.767361,2015-02-21 11:35:26.000
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,unknown,11,1807,2,True,127,1369,105336226,41.421018,-87.767361,2015-02-21 11:36:08.000
4,5,990416,2015-02-21 11:39:41,,22281684P01*22357957P01*22362082P01*,13063430,0USA13_13_0415_2238A,VOLVO,0,4364,17,False,2,1674,105427130,38.416481,-89.442638,2015-02-21 11:39:37.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187330,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,04384413*22383729*082218154102*60701732*G1*BGT*,80156139,6X1u17D1500000000,CMMNS,0,1761,17,False,3,2282,105439740,37.094768,-85.897407,2020-03-06 14:00:21.000
1187331,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,04358814*06099720*030816202706*09400153*G1*BDR*,79932020,6X1u13D1500000000,CMMNS,0,1569,31,True,5,1994,105354084,34.390740,-79.461805,2020-03-06 14:04:59.000
1187332,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,05317106*05100987*050719120655*09401585*G1*BDR*,79880653,6X1u13D1500000000,CMMNS,0,3216,10,True,1,1850,105336308,34.430370,-84.920509,2020-03-06 14:14:14.000
1187333,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,04384413*22544852*090619141107*60701756*G1*BGT*,,,,0,111,18,True,8,2377,108605700,35.030925,-85.321527,2020-03-06 14:14:49.000


#  **Rolling window of data - options to use EventTimeStamp to find patterns in the data.**

In [14]:
service_fault

Unnamed: 0,Published in CES 14602,Cummins Fault Code,Revision,PID,SID,MID,J1587 FMI,SPN,J1939 FMI,J2012 Pcode,Lamp Color,Lamp Device,Cummins Description,Algorithm Description
0,Y,111,167,Not Mapped,254,0,12,629,12,P0606,Red,Stop / Shutdown,Engine Control Module Critical Internal Failur...,Error internal to the ECM related to memory ha...
1,Y,112,167,Not Mapped,20,128,7,635,7,Not Mapped,Red,Stop / Shutdown,Engine Timing Actuator Driver Circuit - Mechan...,Mechanical failure in the engine timing actuat...
2,Y,113,167,Not Mapped,20,128,3,635,3,Not Mapped,Amber,Warning,Engine Timing Actuator Driver Circuit - Voltag...,High signal voltage detected at the engine tim...
3,Y,114,167,Not Mapped,20,128,4,635,4,Not Mapped,Amber,Warning,Engine Timing Actuator Driver Circuit - Voltag...,Low voltage detected at the engine timing actu...
4,Y,115,167,190,Not Mapped,Not Mapped,2,612,2,P0008,Red,Stop / Shutdown,Engine Magnetic Speed/Position Lost Both of Tw...,The ECM has detected that the primary and back...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7119,Y,9996,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,
7120,Y,9997,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,
7121,Y,9998,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,
7122,Y,9999,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,


In [15]:
service_fault.shape

(7124, 14)

In [16]:
service_fault.isna().sum()

Published in CES 14602       0
Cummins Fault Code           0
Revision                     0
PID                          0
SID                          0
MID                          0
J1587 FMI                    0
SPN                          0
J1939 FMI                    0
J2012 Pcode                  0
Lamp Color                   0
Lamp Device                  0
Cummins Description          0
Algorithm Description     5119
dtype: int64

In [17]:
vehicle_Diagnostics

Unnamed: 0,Id,Name,Value,FaultId
0,1,IgnStatus,False,1
1,2,EngineOilPressure,0,1
2,3,EngineOilTemperature,96.74375,1
3,4,TurboBoostPressure,0,1
4,5,EngineLoad,11,1
...,...,...,...,...
12821621,12864020,EngineCoolantTemperature,181.4,1248457
12821622,12864021,ParkingBrake,False,1248457
12821623,12864022,SwitchedBatteryVoltage,14.1,1248457
12821624,12864023,DistanceLtd,28606.65625,1248457


In [18]:
vehicle_Diagnostics.shape

(12821626, 4)

In [19]:
vehicle_Diagnostics.isna().sum()

Id         0
Name       0
Value      0
FaultId    0
dtype: int64