In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime as dt

In [2]:
#Read in J1939Faults. low_memory = false because of warning about mixed type columns.
faults = pd.read_csv('../data/J1939Faults.csv',
                     index_col = 'RecordID',
                     parse_dates = ['EventTimeStamp', 'LocationTimeStamp'],
                     low_memory = False)
#remove faults that occur at service locations. Had to remove the last digit because the rows dont have it.
service_location1 = faults[(faults["Latitude"] == 36.066666) & (faults["Longitude"] == -86.434722)]

service_location2 = faults[(faults["Latitude"] == 35.588333) & (faults["Longitude"] == -86.443888)]

service_location3 = faults[(faults["Latitude"] == 36.1950) & (faults["Longitude"] ==  -83.174722)]

#concat service_locations for removal
service_locations = pd.concat([service_location1, service_location2, service_location3])
#drop service locations
faults_cleaned = faults.drop(service_locations.index)
#remove EquipmentIDs that are longer than 5 Characters per the README
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].map(len) <= 5]
#removed columns that only contained null values or would not be valuable because they're unique to the truck.
faults_cleaned = faults_cleaned.drop(['actionDescription', 'faultValue', 'ecuSerialNumber',
                                      'ecuSource', 'MCTNumber'], axis = 1).reset_index()

  mask |= (ar1 == a)


In [58]:
faults_cleaned['EventTimeStamp'].dt.to_period('Y').value_counts()

2016    332175
2015    325536
2017    254680
2018    143289
2019    111321
2020     17179
2011       227
2000       219
2010        47
2009        24
2026         3
2002         1
Freq: A-DEC, Name: EventTimeStamp, dtype: int64

### limit to years 2015-2020 

In [61]:
faults_cleaned['EventTimeStamp'].dt.to_period('Y') 

0          2015
1          2015
2          2015
3          2015
4          2015
           ... 
1184696    2020
1184697    2020
1184698    2020
1184699    2020
1184700    2020
Name: EventTimeStamp, Length: 1184701, dtype: period[A-DEC]

In [4]:
faults_cleaned

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,111,17,True,2,1439,38.857638,-84.626851,2015-02-21 11:34:25
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,unknown,629,12,True,127,1439,38.857638,-84.626851,2015-02-21 11:35:10
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,1807,2,False,127,1369,41.421250,-87.767361,2015-02-21 11:35:26
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,1807,2,True,127,1369,41.421018,-87.767361,2015-02-21 11:36:08
4,5,990416,2015-02-21 11:39:41,,22281684P01*22357957P01*22362082P01*,0USA13_13_0415_2238A,VOLVO,4364,17,False,2,1674,38.416481,-89.442638,2015-02-21 11:39:37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1184696,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,04384413*22383729*082218154102*60701732*G1*BGT*,6X1u17D1500000000,CMMNS,1761,17,False,3,2282,37.094768,-85.897407,2020-03-06 14:00:21
1184697,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,04358814*06099720*030816202706*09400153*G1*BDR*,6X1u13D1500000000,CMMNS,1569,31,True,5,1994,34.390740,-79.461805,2020-03-06 14:04:59
1184698,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,05317106*05100987*050719120655*09401585*G1*BDR*,6X1u13D1500000000,CMMNS,3216,10,True,1,1850,34.430370,-84.920509,2020-03-06 14:14:14
1184699,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,04384413*22544852*090619141107*60701756*G1*BGT*,,,111,18,True,8,2377,35.030925,-85.321527,2020-03-06 14:14:49


# In the full dataset
### How many trucks have a full derate/partial derate? 

#### 211 have full derates 'J1939Faults.csv'

#### 498 have partial

#### 182 have both. 

In [67]:
full = faults_cleaned[(faults_cleaned['spn'] == 5246)]
full = full['EquipmentID'].unique()

In [78]:
len(full)

211

In [68]:
partial = faults_cleaned[(faults_cleaned['spn'] == 1569)]
partial = partial['EquipmentID'].unique()

In [79]:
len(partial)

498

In [80]:
intersection = np.intersect1d(full, partial)

In [77]:
len(intersection)

182

### Filter down to the trucks that have both

In [83]:
both = faults_cleaned[faults_cleaned['EquipmentID'].isin(intersection)]

In [86]:
both.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 357347 entries, 5 to 1184681
Data columns (total 15 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   RecordID               357347 non-null  int64         
 1   ESS_Id                 357347 non-null  int64         
 2   EventTimeStamp         357347 non-null  datetime64[ns]
 3   eventDescription       346863 non-null  object        
 4   ecuSoftwareVersion     287808 non-null  object        
 5   ecuModel               347610 non-null  object        
 6   ecuMake                347610 non-null  object        
 7   spn                    357347 non-null  int64         
 8   fmi                    357347 non-null  int64         
 9   active                 357347 non-null  bool          
 10  activeTransitionCount  357347 non-null  int64         
 11  EquipmentID            357347 non-null  object        
 12  Latitude               357347 non-null  flo

In [98]:
both['EventTimeStamp'].dt.to_period('Y').value_counts()

2016    117106
2015    105387
2017     74788
2018     37820
2019     18624
2020      3534
2011        81
2010         4
2000         3
Freq: A-DEC, Name: EventTimeStamp, dtype: int64