In [1]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt 
import seaborn as sns 

In [2]:
pd.set_option('display.max_columns', None)

faults = pd.read_csv('../data/J1939Faults.csv', low_memory=False, parse_dates=['EventTimeStamp', 'LocationTimeStamp']) #1187335

### Big G Express: Predicting Derates
In this project, you will be working with fault code data and vehicle onboard diagnostic data to try and predict an upcoming full derate. These are indicated by an SPN 5246.

You have been provided with a two files containing the data you will use to make these predictions (J1939Faults.csv and VehicleDiagnosticOnboardData.csv) as well as two files describing some of the contents (DataInfo.docx and Service Fault Codes_1_0_0_167.xlsx)

Note that in its raw form the data does not have "labels", so you must define what labels you are going to use and create those labels in your dataset. Also, you will likely need to perform some significant feature engineering in order to build an accurate predictor.

Additional cleaning tasks:

1. Remove faults occurring in the vicinity of the service locations at (36.0666667, -86.4347222), (35.5883333, -86.4438888), and (36.1950, -83.174722)
2. Remove faults where the EquipmentID has more than 5 characters.

In [3]:
for lat, lon in [(36.0666667, -86.4347222), (35.5883333, -86.4438888), (36.1950, -83.174722)]:
    
    faults = faults.loc[~((abs(lat - faults['Latitude']) <= 0.01) &
                          (abs(lon - faults['Longitude']) <= 0.01))]

In [4]:

#Remove faults where the EquipmentID has more than 5 characters.
faults =faults[faults['EquipmentID'].str.len() <= 5] #1185166 rows 

In [5]:
faults

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,actionDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,faultValue,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,,unknown,unknown,unknown,unknown,0,111,17,True,2,,1439,105354361,38.857638,-84.626851,2015-02-21 11:34:25
1,2,990360,2015-02-21 11:34:34,,,unknown,unknown,unknown,unknown,11,629,12,True,127,,1439,105354361,38.857638,-84.626851,2015-02-21 11:35:10
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,,unknown,unknown,unknown,unknown,11,1807,2,False,127,,1369,105336226,41.421250,-87.767361,2015-02-21 11:35:26
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,,unknown,unknown,unknown,unknown,11,1807,2,True,127,,1369,105336226,41.421018,-87.767361,2015-02-21 11:36:08
4,5,990416,2015-02-21 11:39:41,,,22281684P01*22357957P01*22362082P01*,13063430,0USA13_13_0415_2238A,VOLVO,0,4364,17,False,2,,1674,105427130,38.416481,-89.442638,2015-02-21 11:39:37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187330,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,,04384413*22383729*082218154102*60701732*G1*BGT*,80156139,6X1u17D1500000000,CMMNS,0,1761,17,False,3,,2282,105439740,37.094768,-85.897407,2020-03-06 14:00:21
1187331,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,,04358814*06099720*030816202706*09400153*G1*BDR*,79932020,6X1u13D1500000000,CMMNS,0,1569,31,True,5,,1994,105354084,34.390740,-79.461805,2020-03-06 14:04:59
1187332,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,,05317106*05100987*050719120655*09401585*G1*BDR*,79880653,6X1u13D1500000000,CMMNS,0,3216,10,True,1,,1850,105336308,34.430370,-84.920509,2020-03-06 14:14:14
1187333,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,,04384413*22544852*090619141107*60701756*G1*BGT*,,,,0,111,18,True,8,,2377,108605700,35.030925,-85.321527,2020-03-06 14:14:49


In [None]:
faults_1569 = faults.loc[(faults['spn'] == 1569) & (faults['fmi'] == 31)]

faults_1569
#faults.loc[faults['spn'] == 5246]
#faults.loc[faults['EquipmentID'] == 1395] #1721


In [26]:
faults_5246 = faults.loc[faults['spn'] == 5246]
faults_5246

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,actionDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,faultValue,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
2089,2090,1011009,2015-02-23 05:05:44,,,05290170*03015749*051914190353*09400015*G1*BDR*,79642446,6X1u13D1500000000,CMMNS,0,5246,0,True,1,,1630,105329900,40.733009,-74.087777,2015-02-23 05:08:23
2971,2972,1026305,2015-02-23 15:54:22,,,unknown,unknown,unknown,unknown,0,5246,0,True,1,,1487,105369355,28.077361,-81.897083,2015-02-23 15:54:58
5713,5714,1070646,2015-02-25 13:53:08,,,unknown,unknown,unknown,unknown,0,5246,0,True,1,,1329,105400037,39.399583,-82.974768,2015-02-25 13:56:31
5809,5810,1071907,2015-02-25 14:47:00,,,unknown,unknown,unknown,unknown,0,5246,0,False,1,,1329,105400037,39.399629,-82.974814,2015-02-25 14:46:56
6534,6535,1097942,2015-02-26 22:24:29,,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,5246,0,True,1,,1419,105355995,37.596805,-85.865555,2015-02-26 22:25:05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1179423,1239564,119961467,2020-02-06 08:03:09,,,04358814*06005963*051718174436*09401683*G1*BDR*,79897320,6X1u13D1500000000,CMMNS,0,5246,0,False,1,,1854,105385876,35.943472,-83.823240,2020-02-06 08:03:05
1181700,1241841,120905759,2020-02-13 13:32:39,,,04358814*06026985*051718174436*09401683*G1*BDR*,79903054,6X1u13D1500000000,CMMNS,0,5246,0,True,1,,1872,105301976,35.707268,-81.397037,2020-02-13 13:33:15
1181717,1241858,120910417,2020-02-13 14:01:40,,,04358814*06026985*051718174436*09401683*G1*BDR*,79903054,6X1u13D1500000000,CMMNS,0,5246,0,False,1,,1872,105301976,35.708101,-81.395648,2020-02-13 13:59:51
1181996,1242137,121038018,2020-02-14 11:21:54,,,,,,,49,5246,19,True,88,,302,105418777,38.349490,-85.708425,2020-02-14 11:22:30


In [27]:
faults_5246["EquipmentID"].nunique()

191

In [30]:
#df1=faults_5246.loc[faults_5246['EquipmentID'] == '1872']

df1 = faults.loc[( faults['EquipmentID'] == '1419')]
df1= df1.loc[(df1['LocationTimeStamp'] > '2016-03-01') ]
df1= df1.loc[(df1['LocationTimeStamp'] <= '2016-04-25') ].sort_values(by=['LocationTimeStamp'])
#df1['spn']
df1.tail(30)
#df1["EquipmentID"].value_counts().to_frame().reset_index().head(30)

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,actionDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,faultValue,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
394113,401614,7752219,2016-03-01 09:40:35,High (Severity Medium) Aftertreatment 1 Partic...,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,3251,16,False,1,,1419,105355995,28.561111,-81.855416,2016-03-01 09:40:31
396097,403598,7783445,2016-03-02 14:21:48,High (Severity Medium) Aftertreatment 1 Partic...,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,3251,16,True,1,,1419,105355995,34.052453,-83.08324,2016-03-02 14:22:24
396990,404491,7799478,2016-03-03 09:05:15,High (Severity Medium) Aftertreatment 1 Partic...,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,3251,16,False,1,,1419,105355995,32.866203,-81.960277,2016-03-03 09:05:10
397073,404574,7801047,2016-03-03 10:10:51,,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,4363,0,True,1,,1419,105355995,32.232361,-81.651157,2016-03-03 10:11:28
397083,404584,7801294,2016-03-03 10:21:06,High (Severity Medium) Aftertreatment 1 Partic...,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,3251,16,True,1,,1419,105355995,32.190787,-81.473009,2016-03-03 10:21:42
397130,404631,7802206,2016-03-03 10:58:59,,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,4363,0,False,1,,1419,105355995,31.960416,-81.331805,2016-03-03 10:58:55
397252,404753,7804937,2016-03-03 12:43:13,High (Severity Medium) Aftertreatment 1 Partic...,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,3251,16,False,1,,1419,105355995,30.476527,-81.644907,2016-03-03 12:43:09
397314,404815,7805953,2016-03-03 13:22:14,,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,4363,0,True,1,,1419,105355995,29.987268,-81.462731,2016-03-03 13:22:50
397388,404889,7807126,2016-03-03 14:14:06,,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,4363,0,False,1,,1419,105355995,29.340185,-81.133888,2016-03-03 14:14:02
397447,404948,7808070,2016-03-03 14:49:32,High (Severity Medium) Aftertreatment 1 Partic...,,04993120*00021657*082113134117*07700053*I0*BBZ*,79466573,6X1u10D1500000000,CMMNS,0,3251,16,True,1,,1419,105355995,29.317037,-81.131712,2016-03-03 14:50:08


In [32]:
df1[df1.isin({'spn': [1569, 5246]})]

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,actionDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,fmi,active,activeTransitionCount,faultValue,EquipmentID,MCTNumber,Latitude,Longitude,LocationTimeStamp
394113,,,NaT,,,,,,,,,,,,,,,,,NaT
396097,,,NaT,,,,,,,,,,,,,,,,,NaT
396990,,,NaT,,,,,,,,,,,,,,,,,NaT
397073,,,NaT,,,,,,,,,,,,,,,,,NaT
397083,,,NaT,,,,,,,,,,,,,,,,,NaT
397130,,,NaT,,,,,,,,,,,,,,,,,NaT
397252,,,NaT,,,,,,,,,,,,,,,,,NaT
397314,,,NaT,,,,,,,,,,,,,,,,,NaT
397388,,,NaT,,,,,,,,,,,,,,,,,NaT
397447,,,NaT,,,,,,,,,,,,,,,,,NaT


In [None]:
faults["actionDescription"].value_counts()


In [None]:
faults.corr()

In [None]:
print(faults.columns)

In [None]:
faults.describe()

In [None]:
faults.info()
faults.isnull().sum()

In [None]:
for colname in faults.columns[:19]:
    print('0s in "{variable}": {count}'.format(
        variable=colname,
        count=np.count_nonzero(faults[colname] == 0)))

In [None]:
faults.boxplot(figsize = [10, 7])
plt.show()

In [None]:
vdo_data = pd.read_csv('../data/VehicleDiagnosticOnboardData.csv')
vdo_data

In [None]:
vdo_data.info()

In [None]:
vdo_data["Value"].value_counts().to_frame().reset_index().head(30)

In [6]:
fc = pd.read_excel('../data/Service Fault Codes_1_0_0_167.xlsx')
 

dfc = fc.loc[(fc['SPN'] == 3362)]
#fc.loc[( faults['spn'] == '3362')]
dfc

  warn(msg)


Unnamed: 0,Published in CES 14602,Cummins Fault Code,Revision,PID,SID,MID,J1587 FMI,SPN,J1939 FMI,J2012 Pcode,Lamp Color,Lamp Device,Cummins Description,Algorithm Description
1078,Y,1682,167,Not Mapped,Not Mapped,Not Mapped,11,3362,31,Not Mapped,Amber,Warning,Aftertreatment 1 Diesel Exhaust Fluid Dosing U...,Lack of urea or air to the doser unit
2408,Y,3569,167,Not Mapped,Not Mapped,Not Mapped,7,3362,7,Not Mapped,Amber,Warning,Aftertreatment 1 Diesel Exhaust Fluid Dosing U...,
2917,Y,4295,167,Not Mapped,Not Mapped,Not Mapped,5,3362,5,Not Mapped,Amber,Warning,Aftertreatment 1 Diesel Exhaust Fluid Dosing U...,


In [35]:
fc

Unnamed: 0,Published in CES 14602,Cummins Fault Code,Revision,PID,SID,MID,J1587 FMI,SPN,J1939 FMI,J2012 Pcode,Lamp Color,Lamp Device,Cummins Description,Algorithm Description
0,Y,111,167,Not Mapped,254,0,12,629,12,P0606,Red,Stop / Shutdown,Engine Control Module Critical Internal Failur...,Error internal to the ECM related to memory ha...
1,Y,112,167,Not Mapped,20,128,7,635,7,Not Mapped,Red,Stop / Shutdown,Engine Timing Actuator Driver Circuit - Mechan...,Mechanical failure in the engine timing actuat...
2,Y,113,167,Not Mapped,20,128,3,635,3,Not Mapped,Amber,Warning,Engine Timing Actuator Driver Circuit - Voltag...,High signal voltage detected at the engine tim...
3,Y,114,167,Not Mapped,20,128,4,635,4,Not Mapped,Amber,Warning,Engine Timing Actuator Driver Circuit - Voltag...,Low voltage detected at the engine timing actu...
4,Y,115,167,190,Not Mapped,Not Mapped,2,612,2,P0008,Red,Stop / Shutdown,Engine Magnetic Speed/Position Lost Both of Tw...,The ECM has detected that the primary and back...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7119,Y,9996,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,
7120,Y,9997,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,
7121,Y,9998,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,
7122,Y,9999,167,Not Mapped,155,0,11,524286,31,Not Mapped,Amber,Warning,Reserved for temporary use - Condition Exists,


In [None]:
all_trucks = faults['EquipmentID'].unique()
partial_derate = faults.loc[(faults['spn'] == 1569) & (faults['fmi'] == 31)]['EquipmentID'].unique()
total_derate = faults.loc[faults['spn'] == 5246]['EquipmentID'].unique()

partial_derate_only = partial_derate[np.isin(partial_derate, total_derate, invert=True)]
total_derate_only = total_derate[np.isin(total_derate, partial_derate, invert=True)]
partial_and_total_derate = np.intersect1d(partial_derate, total_derate)
no_derate = all_trucks[np.isin(all_trucks, partial_derate_only, invert=True) | np.isin(all_trucks, total_derate_only, invert=True)]

In [None]:
len(all_trucks)

In [None]:
len(partial_derate)

In [None]:
len(total_derate)

In [None]:
faults_spn = faults['spn'].unique()

In [None]:
pd.DataFrame(total_derate)