In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime as dt

In [112]:
#Read in J1939Faults. low_memory = false because of warning about mixed type columns.
faults = pd.read_csv('../data/J1939Faults.csv',
                     index_col = 'RecordID',
                     parse_dates = ['EventTimeStamp', 'LocationTimeStamp'],
                     low_memory = False)
#remove faults that occur at service locations. Had to remove the last digit because the rows dont have it.
service_location1 = faults[(faults["Latitude"] == 36.066666) & (faults["Longitude"] == -86.434722)]

service_location2 = faults[(faults["Latitude"] == 35.588333) & (faults["Longitude"] == -86.443888)]

service_location3 = faults[(faults["Latitude"] == 36.1950) & (faults["Longitude"] ==  -83.174722)]

#concat service_locations for removal
service_locations = pd.concat([service_location1, service_location2, service_location3])

#drop service locations
faults_cleaned = faults.drop(service_locations.index)

#remove EquipmentIDs that are longer than 5 Characters per the README
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].map(len) <= 5]

#removed columns that only contained null values or would not be valuable because they're unique to the truck.
faults_cleaned = faults_cleaned.drop(['actionDescription', 'faultValue', 'ecuSerialNumber',
                                      'ecuSource', 'MCTNumber'], axis = 1).reset_index()

  mask |= (ar1 == a)


In [113]:
#Limit to years 2015-2020
faults_cleaned = faults_cleaned[(faults_cleaned['EventTimeStamp'] >= '2015-01-01 00:00:01') & (faults_cleaned['EventTimeStamp'] <= '2020-12-31 12:59:59')]



In [114]:
#prepare derates for dummization
faults_cleaned.loc[faults_cleaned['spn'] != 1569, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] != 5246, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] == 5246, 'spn_derate'] = 'full'
faults_cleaned.loc[faults_cleaned['spn'] == 1569, 'spn_derate'] = 'partial'  


faults_cleaned = faults_cleaned.set_index('RecordID')

#get dummies
derates = pd.get_dummies(faults_cleaned['spn_derate'])
derates

#merge back with original dataset
faults_cleaned = faults_cleaned.merge(derates, left_on = 'RecordID', right_on = 'RecordID') 

In [120]:
#sorts and groups by truck. then sorts by event timestamp. this made the diff() calculate correctly ¯\_(ツ)_/¯
faults_cleaned = faults_cleaned.sort_values(['EquipmentID'], ascending=True) \
    .groupby(['EquipmentID'], sort=False) \
    .apply(lambda x: x.sort_values(['EventTimeStamp'], ascending=True)) \
    .reset_index(drop=True)

In [55]:
faults_cleaned['EventTimeStamp'].dt.to_period('Y').value_counts()

2016    332175
2015    325536
2017    254680
2018    143289
2019    111321
2020     17179
Freq: A-DEC, Name: EventTimeStamp, dtype: int64

### Based on the distribution of years I think it would be best to limit the dataset to years 2015-2020 

#### 2015 -2020 are consecutive while before 2015 it skips years 2014, 2013, 2012 and picks back up at 2011. It's possible that some trucks from 2014 carry over into 2015, but unlikely any from 2011 are carried over since they limit their lease to 4 years.

# In the full dataset
### How many trucks have a full derate/partial derate? 

#### 210 have full derates 

#### 498 have partial

#### 182 have both. 

Interestingly, when filtering down for years 2015-2020, Full derates went down from 211 to 210 but the partial remained unchanged. 

In [65]:
full = faults_cleaned[(faults_cleaned['spn'] == 5246)]
full = full['EquipmentID'].unique()

len(full)

210

In [66]:
partial = faults_cleaned[(faults_cleaned['spn'] == 1569)]
partial = partial['EquipmentID'].unique()

len(partial)

498

In [67]:
intersection = np.intersect1d(full, partial)

len(intersection)

182

In [124]:
faults_cleaned['timedelta'] = faults_cleaned.groupby('EquipmentID')['EventTimeStamp'].diff()

In [125]:
faults_cleaned

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
0,1059893,2015-02-25 06:17:50,Low (Severity Medium) Battery Potential / Powe...,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,444,18,True,1,1327,36.066805,-86.433981,2015-02-25 06:22:31,neither,0,1,0,NaT
1,1059892,2015-02-25 06:17:50,Low (Severity Low) Engine Coolant Level,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,True,1,1327,36.066805,-86.433981,2015-02-25 06:22:30,neither,0,1,0,0 days 00:00:00
2,1061595,2015-02-25 07:40:59,Low (Severity Low) Engine Coolant Level,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,False,1,1327,36.067083,-86.434722,2015-02-25 06:24:30,neither,0,1,0,0 days 01:23:09
3,1062652,2015-02-25 08:24:49,Low (Severity Medium) Battery Potential / Powe...,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,444,18,False,1,1327,36.067083,-86.434722,2015-02-25 06:24:30,neither,0,1,0,0 days 00:43:50
4,2022693,2015-04-22 09:10:12,Low Voltage (Particulate Trap Outlet Pressure 1),04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,3610,4,True,1,1327,36.194861,-83.174768,2015-04-22 09:10:48,neither,0,1,0,56 days 00:45:23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1184175,1052146,2015-02-24 16:24:05,Low (Severity Medium) Catalyst Tank Level,05317106*04119044*051914190353*09400015*G1*BDR*,6X1u13D1500000000,CMMNS,1761,18,False,11,R1762,39.551851,-86.045925,2015-02-24 16:24:00,neither,0,1,0,0 days 00:52:09
1184176,1089561,2015-02-26 13:12:11,,05317106*04119044*051914190353*09400015*G1*BDR*,6X1u13D1500000000,CMMNS,5848,9,True,1,R1762,39.952870,-81.936990,2015-02-26 13:12:48,neither,0,1,0,1 days 20:48:06
1184177,1090499,2015-02-26 13:50:59,,05317106*04119044*051914190353*09400015*G1*BDR*,6X1u13D1500000000,CMMNS,5848,9,False,1,R1762,39.953379,-81.937407,2015-02-26 13:50:54,neither,0,1,0,0 days 00:38:48
1184178,1059704,2015-02-25 06:08:43,Incorrect Data J1939 Network #1 Primary Vehicl...,unknown,unknown,unknown,639,2,True,127,R1764,36.001296,-86.501435,2015-02-25 06:10:53,neither,0,1,0,NaT


In [89]:
faults_cleaned

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,111,17,True,2,1439,38.857638,-84.626851,2015-02-21 11:34:25,neither,0,1,0,NaT
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,unknown,629,12,True,127,1439,38.857638,-84.626851,2015-02-21 11:35:10,neither,0,1,0,0 days 00:47:21
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,1807,2,False,127,1369,41.421250,-87.767361,2015-02-21 11:35:26,neither,0,1,0,NaT
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,1807,2,True,127,1369,41.421018,-87.767361,2015-02-21 11:36:08,neither,0,1,0,0 days 00:00:02
4,5,990416,2015-02-21 11:39:41,,22281684P01*22357957P01*22362082P01*,0USA13_13_0415_2238A,VOLVO,4364,17,False,2,1674,38.416481,-89.442638,2015-02-21 11:39:37,neither,0,1,0,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1184175,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,04384413*22383729*082218154102*60701732*G1*BGT*,6X1u17D1500000000,CMMNS,1761,17,False,3,2282,37.094768,-85.897407,2020-03-06 14:00:21,neither,0,1,0,8 days 04:29:12
1184176,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,04358814*06099720*030816202706*09400153*G1*BDR*,6X1u13D1500000000,CMMNS,1569,31,True,5,1994,34.390740,-79.461805,2020-03-06 14:04:59,partial,0,0,1,0 days 00:56:49
1184177,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,05317106*05100987*050719120655*09401585*G1*BDR*,6X1u13D1500000000,CMMNS,3216,10,True,1,1850,34.430370,-84.920509,2020-03-06 14:14:14,neither,0,1,0,3 days 00:34:59
1184178,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,04384413*22544852*090619141107*60701756*G1*BGT*,,,111,18,True,8,2377,35.030925,-85.321527,2020-03-06 14:14:49,neither,0,1,0,3 days 06:25:26


In [99]:
faults_cleaned[faults_cleaned['EquipmentID']=='1439'].sort_values(by = 'EventTimeStamp')

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,111,17,True,2,1439,38.857638,-84.626851,2015-02-21 11:34:25,neither,0,1,0,NaT
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,unknown,629,12,True,127,1439,38.857638,-84.626851,2015-02-21 11:35:10,neither,0,1,0,0 days 00:47:21
11,12,990462,2015-02-21 11:43:18,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,111,17,False,2,1439,38.857592,-84.626805,2015-02-21 11:43:13,neither,0,1,0,0 days 00:08:44
360,361,994976,2015-02-21 16:45:27,,unknown,unknown,unknown,629,12,False,127,1439,36.975416,-84.106712,2015-02-21 16:45:23,neither,0,1,0,0 days 05:02:09
362,363,994985,2015-02-21 16:45:31,,unknown,unknown,unknown,629,12,True,127,1439,36.975462,-84.106666,2015-02-21 16:46:07,neither,0,1,0,0 days 00:00:04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
452790,462272,8891628,2016-04-28 12:17:53,,unknown,unknown,unknown,50353,0,True,2,1439,35.608796,-80.518240,2016-04-28 12:18:29,neither,0,1,0,0 days 02:25:16
452800,462282,8891934,2016-04-28 12:31:06,,unknown,unknown,unknown,50353,0,False,2,1439,35.608657,-80.518148,2016-04-28 12:31:02,neither,0,1,0,0 days 00:13:13
458439,467930,9010565,2016-05-04 18:22:38,,unknown,unknown,unknown,36017,0,True,2,1439,35.657453,-81.963657,2016-05-04 18:23:21,neither,0,1,0,6 days 05:51:32
458440,467931,9010581,2016-05-04 18:24:23,,unknown,unknown,unknown,36017,0,False,2,1439,35.658101,-81.964027,2016-05-04 18:24:19,neither,0,1,0,0 days 00:01:45


In [94]:
faults_cleaned['EquipmentID']

0          1439
1          1439
2          1369
3          1369
4          1674
           ... 
1184175    2282
1184176    1994
1184177    1850
1184178    2377
1184179    2377
Name: EquipmentID, Length: 1184180, dtype: object