I will be working with fault code data and vehicle onboard diagnostic data to try and predict an upcoming full derate. These are indicated by an SPN 5246.

In [24]:
import pandas as pd
from datetime import datetime
import geopandas as gpd
from geopy.distance import distance
import numpy as np

In [25]:
parse_dates=['EventTimeStamp']

In [26]:
faults = pd.read_csv('data/J1939Faults.csv', low_memory=False, parse_dates=['EventTimeStamp'])
service_fault = pd.read_excel('data/ServiceFaultCodes.xlsx')
vehicle_Diagnostics = pd.read_csv('data/VehicleDiagnosticOnboardData.csv')

  for idx, row in parser.parse():


In [27]:
# filter out rows that have more than 5 characters in the 'EquipmentID' column
faults =faults[faults['EquipmentID'].str.len() <= 5]

Basic EDA to check how the data looks like:

In [28]:
#get rid of faultValue and actionDescription since they haven't been filled in. 
#FaultId = RecordID

In [29]:
faults = faults.drop(columns = ["actionDescription", "faultValue"])

In [30]:
faults.shape
#service_fault.shape
#vehicle_Diagnostics.shape

(1185166, 18)

In [31]:
faults.isna().sum()
#service_fault.isna().sum()
#vehicle_Diagnostics.isna().sum()

RecordID                      0
ESS_Id                        0
EventTimeStamp                0
eventDescription          60366
ecuSoftwareVersion       295827
ecuSerialNumber          342772
ecuModel                  64649
ecuMake                   64649
ecuSource                     0
spn                           0
fmi                           0
active                        0
activeTransitionCount         0
EquipmentID                   0
MCTNumber                     0
Latitude                      0
Longitude                     0
LocationTimeStamp             0
dtype: int64

Remove faults occurring in the vicinity of the service locations at (36.0666667, -86.4347222), (35.5883333, -86.4438888), and (36.1950, -83.174722)

In [32]:
faults = faults[(faults['Latitude'] != '36.0666667') & (faults['Longitude'] != '-86.4347222')]
faults = faults[(faults['Latitude'] != '35.5883333') & (faults['Longitude'] != '-86.4438888')]
faults = faults[(faults['Latitude'] != '36.1950') & (faults['Longitude'] != '-83.174722')]

To filter out the events near the service stations:

In [33]:
for lat, lon in [(36.0666667, -86.4347222), (35.5883333, -86.4438888), (36.1950, -83.174722)]:
    
    faults = faults.loc[~((abs(lat - faults['Latitude']) <= 0.01) &
                          (abs(lon - faults['Longitude']) <= 0.01))]

In [34]:
faults.loc[faults['spn'] == 5246]

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
2089,2090,1011009,2015-02-23 05:05:44,,05290170*03015749*051914190353*09400015*G1*BDR*,79642446,6X1u13D1500000000,CMMNS,0,5246,0,True,1,1630,105329900,40.733009,-74.087777,2015-02-23 05:08:23.000
2971,2972,1026305,2015-02-23 15:54:22,,unknown,unknown,unknown,unknown,0,5246,0,True,1,1487,105369355,28.077361,-81.897083,2015-02-23 15:54:58.000
5713,5714,1070646,2015-02-25 13:53:08,,unknown,unknown,unknown,unknown,0,5246,0,True,1,1329,105400037,39.399583,-82.974768,2015-02-25 13:56:31.000
5809,5810,1071907,2015-02-25 14:47:00,,unknown,unknown,unknown,unknown,0,5246,0,False,1,1329,105400037,39.399629,-82.974814,2015-02-25 14:46:56.000
6534,6535,1097942,2015-02-26 22:24:29,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,5246,0,True,1,1419,105355995,37.596805,-85.865555,2015-02-26 22:25:05.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1179423,1239564,119961467,2020-02-06 08:03:09,,04358814*06005963*051718174436*09401683*G1*BDR*,79897320,6X1u13D1500000000,CMMNS,0,5246,0,False,1,1854,105385876,35.943472,-83.823240,2020-02-06 08:03:05.000
1181700,1241841,120905759,2020-02-13 13:32:39,,04358814*06026985*051718174436*09401683*G1*BDR*,79903054,6X1u13D1500000000,CMMNS,0,5246,0,True,1,1872,105301976,35.707268,-81.397037,2020-02-13 13:33:15.000
1181717,1241858,120910417,2020-02-13 14:01:40,,04358814*06026985*051718174436*09401683*G1*BDR*,79903054,6X1u13D1500000000,CMMNS,0,5246,0,False,1,1872,105301976,35.708101,-81.395648,2020-02-13 13:59:51.000
1181996,1242137,121038018,2020-02-14 11:21:54,,,,,,49,5246,19,True,88,302,105418777,38.349490,-85.708425,2020-02-14 11:22:30.000


selected unique trucks with partial derate and complete derate (I can change the name, currently I went with ‘total’ derate) and then I compared them.

In [35]:
all_trucks = faults['EquipmentID'].unique()
partial_derate = faults.loc[(faults['spn'] == 1569) & (faults['fmi'] == 31)]['EquipmentID'].unique()
total_derate = faults.loc[faults['spn'] == 5246]['EquipmentID'].unique()

partial_derate_only = partial_derate[np.isin(partial_derate, total_derate, invert=True)]
total_derate_only = total_derate[np.isin(total_derate, partial_derate, invert=True)]
partial_and_total_derate = np.intersect1d(partial_derate, total_derate)
no_derate = all_trucks[np.isin(all_trucks, partial_derate_only, invert=True) | np.isin(all_trucks, total_derate_only, invert=True)]

Dropping dates after 2011 

In [36]:
faults = faults.loc[faults['EventTimeStamp'].dt.year > 2011]

- Creating a copy of Faults dataframe to be able to make changes without affecting the original data. 

In [37]:
faults_copy = faults 
Diagnostics = vehicle_Diagnostics

Changing the shape of Diagnostics to be able to merge it. 

In [38]:
# Create a wider table from long table, and drop the Name
Diagnostics = Diagnostics.pivot(index="FaultId", columns="Name", values="Value").reset_index()

In [39]:
Diagnostics.isna().sum()

Name
FaultId                            0
AcceleratorPedal              655446
BarometricPressure            601359
CruiseControlActive           612419
CruiseControlSetSpeed         610877
DistanceLtd                   601516
EngineCoolantTemperature      601264
EngineLoad                    601714
EngineOilPressure             601091
EngineOilTemperature          603423
EngineRpm                     600414
EngineTimeLtd                 605969
FuelLevel                     684540
FuelLtd                       602140
FuelRate                      602098
FuelTemperature               888225
IgnStatus                     578881
IntakeManifoldTemperature     601044
LampStatus                         0
ParkingBrake                  787363
ServiceDistance              1187120
Speed                         603419
SwitchedBatteryVoltage       1073276
Throttle                      766832
TurboBoostPressure            603984
dtype: int64

In [42]:
# convert Series
#cols = ['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'FuelTemperature', 'IntakeManifoldTemperature', 'IntakeManifoldTemperature','ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure']
# convert column "a" of a DataFrame
#Diagnostics[cols] = pd.to_numeric(Diagnostics[cols])

In [46]:
Diagnostics[cols] = Diagnostics[cols].apply(pd.to_numeric, errors='coerce')

In [47]:
#Droping nan and replacing it with avg. 
#cols = ['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'FuelTemperature', 'IntakeManifoldTemperature', 'IntakeManifoldTemperature','ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure']
cols = Diagnostics.select_dtypes(np.number).columns
Diagnostics[cols] = Diagnostics[cols].fillna(Diagnostics[cols].mean())

Diagnostics

Name,FaultId,AcceleratorPedal,BarometricPressure,CruiseControlActive,CruiseControlSetSpeed,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,...,FuelTemperature,IgnStatus,IntakeManifoldTemperature,LampStatus,ParkingBrake,ServiceDistance,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure
0,1,0.000000,14.210000,False,66.486720,423178.700000,100.400000,11.000000,0.000000,96.743750,...,36.877488,False,78.8,1023,True,,0,3276.75,,0
1,2,26.078669,14.205793,,60.466953,360582.050315,166.375439,29.764037,32.180492,184.504485,...,36.877488,True,,1279,,,,,,
2,3,26.078669,14.205793,,60.466953,360582.050315,166.375439,29.764037,32.180492,184.504485,...,36.877488,,,1279,,,,,,
3,4,26.078669,14.205793,,60.466953,360582.050315,166.375439,29.764037,32.180492,184.504485,...,36.877488,True,,1279,,,,,,
4,5,26.078669,14.205793,,60.466953,360582.050315,166.375439,29.764037,32.180492,184.504485,...,36.877488,,,16639,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187330,1248454,26.078669,14.205793,,60.466953,360582.050315,166.375439,29.764037,32.180492,184.504485,...,36.877488,,,1023,,,,,,
1187331,1248455,100.000000,14.500000,True,64.622600,423937.900000,185.000000,51.000000,37.120000,211.493700,...,32.000000,True,98.6,18431,False,,65.01096,,73.2,7.83
1187332,1248456,0.000000,14.355000,True,66.486720,465925.400000,186.800000,62.000000,41.180000,212.843800,...,36.877488,True,91.4,17407,,,66.5741,,100,6.96
1187333,1248457,1.600000,14.427500,False,67.729460,28606.656250,181.400000,0.000000,27.260000,221.731200,...,36.877488,True,100.4,1023,False,,11.84489,14.1,100,1.74


In [48]:
Diagnostics.dtypes

Name
FaultId                        int64
AcceleratorPedal             float64
BarometricPressure           float64
CruiseControlActive           object
CruiseControlSetSpeed        float64
DistanceLtd                  float64
EngineCoolantTemperature     float64
EngineLoad                   float64
EngineOilPressure            float64
EngineOilTemperature         float64
EngineRpm                     object
EngineTimeLtd                 object
FuelLevel                     object
FuelLtd                       object
FuelRate                      object
FuelTemperature              float64
IgnStatus                     object
IntakeManifoldTemperature     object
LampStatus                    object
ParkingBrake                  object
ServiceDistance               object
Speed                         object
SwitchedBatteryVoltage        object
Throttle                      object
TurboBoostPressure            object
dtype: object

In [49]:
#Merge faults and vehicle dignostic tables
faults_copy = pd.merge(faults_copy, Diagnostics, left_on='RecordID', right_on='FaultId')
faults_copy

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,...,FuelTemperature_y,IgnStatus_y,IntakeManifoldTemperature_y,LampStatus_y,ParkingBrake_y,ServiceDistance_y,Speed_y,SwitchedBatteryVoltage_y,Throttle_y,TurboBoostPressure_y
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,unknown,0,111,...,36.877488,False,78.8,1023,True,,0,3276.75,,0
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,unknown,unknown,11,629,...,36.877488,True,,1279,,,,,,
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,unknown,11,1807,...,36.877488,,,1279,,,,,,
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,unknown,11,1807,...,36.877488,True,,1279,,,,,,
4,5,990416,2015-02-21 11:39:41,,22281684P01*22357957P01*22362082P01*,13063430,0USA13_13_0415_2238A,VOLVO,0,4364,...,36.877488,,,16639,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1053359,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,04384413*22383729*082218154102*60701732*G1*BGT*,80156139,6X1u17D1500000000,CMMNS,0,1761,...,36.877488,,,1023,,,,,,
1053360,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,04358814*06099720*030816202706*09400153*G1*BDR*,79932020,6X1u13D1500000000,CMMNS,0,1569,...,32.000000,True,98.6,18431,False,,65.01096,,73.2,7.83
1053361,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,05317106*05100987*050719120655*09401585*G1*BDR*,79880653,6X1u13D1500000000,CMMNS,0,3216,...,36.877488,True,91.4,17407,,,66.5741,,100,6.96
1053362,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,04384413*22544852*090619141107*60701756*G1*BGT*,,,,0,111,...,36.877488,True,100.4,1023,False,,11.84489,14.1,100,1.74


#  **Rolling window of data - options to use EventTimeStamp to find patterns in the data.**

In [None]:
service_fault

In [None]:
service_fault[service_fault['SPN']== 111]