In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime as dt

In [112]:
#Read in J1939Faults. low_memory = false because of warning about mixed type columns.
faults = pd.read_csv('../data/J1939Faults.csv',
                     index_col = 'RecordID',
                     parse_dates = ['EventTimeStamp', 'LocationTimeStamp'],
                     low_memory = False)
#remove faults that occur at service locations. Had to remove the last digit because the rows dont have it.
service_location1 = faults[(faults["Latitude"] == 36.066666) & (faults["Longitude"] == -86.434722)]

service_location2 = faults[(faults["Latitude"] == 35.588333) & (faults["Longitude"] == -86.443888)]

service_location3 = faults[(faults["Latitude"] == 36.1950) & (faults["Longitude"] ==  -83.174722)]

#concat service_locations for removal
service_locations = pd.concat([service_location1, service_location2, service_location3])

#drop service locations
faults_cleaned = faults.drop(service_locations.index)

#remove EquipmentIDs that are longer than 5 Characters per the README
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].map(len) <= 5]

#removed columns that only contained null values or would not be valuable because they're unique to the truck.
faults_cleaned = faults_cleaned.drop(['actionDescription', 'faultValue', 'ecuSerialNumber',
                                      'ecuSource', 'MCTNumber'], axis = 1).reset_index()

  mask |= (ar1 == a)


In [113]:
#Limit to years 2015-2020, Had to add down to seconds because it kept changing the datatype after filtering
faults_cleaned = faults_cleaned[(faults_cleaned['EventTimeStamp'] >= '2015-01-01 00:00:01') & (faults_cleaned['EventTimeStamp'] <= '2020-12-31 12:59:59')]



In [114]:
#prepare derates for dummization
faults_cleaned.loc[faults_cleaned['spn'] != 1569, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] != 5246, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] == 5246, 'spn_derate'] = 'full'
faults_cleaned.loc[faults_cleaned['spn'] == 1569, 'spn_derate'] = 'partial'  


faults_cleaned = faults_cleaned.set_index('RecordID')

#get dummies
derates = pd.get_dummies(faults_cleaned['spn_derate'])
derates

#merge back with original dataset
faults_cleaned = faults_cleaned.merge(derates, left_on = 'RecordID', right_on = 'RecordID') 

In [124]:
#sorts and groups by truck. then sorts by event timestamp. this made the diff() calculate correctly 
#    ¯\_(ツ)_/¯
#code shamelessly stolen from https://arccoder.medium.com/pandas-sort-within-groups-e1f3b6a10a3f
faults_cleaned = faults_cleaned.sort_values(['EquipmentID'], ascending=True) \
    .groupby(['EquipmentID'], sort=False) \
    .apply(lambda x: x.sort_values(['EventTimeStamp'], ascending=True)) \
    .reset_index(drop=True)

faults_cleaned['timedelta'] = faults_cleaned.groupby('EquipmentID')['EventTimeStamp'].diff()

In [55]:
faults_cleaned['EventTimeStamp'].dt.to_period('Y').value_counts()

2016    332175
2015    325536
2017    254680
2018    143289
2019    111321
2020     17179
Freq: A-DEC, Name: EventTimeStamp, dtype: int64

### Based on the distribution of years I think it would be best to limit the dataset to years 2015-2020 

#### 2015 -2020 are consecutive while before 2015 it skips years 2014, 2013, 2012 and picks back up at 2011. It's possible that some trucks from 2014 carry over into 2015, but unlikely any from 2011 are carried over since they limit their lease to 4 years.

# In the full dataset
### How many trucks have a full derate/partial derate? 

#### 210 have full derates 

#### 498 have partial

#### 182 have both. 

Interestingly, when filtering down for years 2015-2020, Full derates went down from 211 to 210 but the partial remained unchanged. 

In [126]:
full = faults_cleaned[(faults_cleaned['spn'] == 5246)]
full = full['EquipmentID'].unique()

len(full)

210

In [127]:
partial = faults_cleaned[(faults_cleaned['spn'] == 1569)]
partial = partial['EquipmentID'].unique()

len(partial)

498

In [128]:
intersection = np.intersect1d(full, partial)

len(intersection)

182

In [132]:
faults_cleaned['timedelta'].describe()

count                      1183135
mean     0 days 17:46:01.603820358
std      6 days 05:11:16.571210166
min                0 days 00:00:00
25%                0 days 00:01:07
50%                0 days 00:15:15
75%         0 days 01:40:33.500000
max             1184 days 05:41:37
Name: timedelta, dtype: object

In [134]:
faults_cleaned['timedelta']

0                      NaT
1          0 days 00:00:00
2          0 days 01:23:09
3          0 days 00:43:50
4         56 days 00:45:23
                ...       
1184175    0 days 00:52:09
1184176    1 days 20:48:06
1184177    0 days 00:38:48
1184178                NaT
1184179    0 days 00:29:57
Name: timedelta, Length: 1184180, dtype: timedelta64[ns]

In [None]:
When the row in faults_cleaned['spn'] is 5246, return the row above (index-1?).value_counts().hist()



In [144]:
def get_number(n):
    position = faults_cleaned[faults_cleaned['spn']==5246].index[0]
    find = range(position - (n-1), position + 1)
    return faults_cleaned.loc[find]



In [147]:
get_number(2)

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
100,1070647,2015-02-25 13:53:08,Condition Exists Engine Protection Torque Derate,unknown,unknown,unknown,1569,31,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,partial,0,0,1,0 days
101,1070646,2015-02-25 13:53:08,,unknown,unknown,unknown,5246,0,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,full,1,0,0,0 days


In [148]:
faults_cleaned.head(100)

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
0,1059893,2015-02-25 06:17:50,Low (Severity Medium) Battery Potential / Powe...,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,444,18,True,1,1327,36.066805,-86.433981,2015-02-25 06:22:31,neither,0,1,0,NaT
1,1059892,2015-02-25 06:17:50,Low (Severity Low) Engine Coolant Level,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,True,1,1327,36.066805,-86.433981,2015-02-25 06:22:30,neither,0,1,0,0 days 00:00:00
2,1061595,2015-02-25 07:40:59,Low (Severity Low) Engine Coolant Level,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,111,17,False,1,1327,36.067083,-86.434722,2015-02-25 06:24:30,neither,0,1,0,0 days 01:23:09
3,1062652,2015-02-25 08:24:49,Low (Severity Medium) Battery Potential / Powe...,04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,444,18,False,1,1327,36.067083,-86.434722,2015-02-25 06:24:30,neither,0,1,0,0 days 00:43:50
4,2022693,2015-04-22 09:10:12,Low Voltage (Particulate Trap Outlet Pressure 1),04993120*00001782*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,3610,4,True,1,1327,36.194861,-83.174768,2015-04-22 09:10:48,neither,0,1,0,56 days 00:45:23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1895753,2015-04-15 08:14:35,Condition Exists Cruise Control Enable Switch,unknown,unknown,unknown,596,31,True,73,1328,41.283101,-87.836712,2015-04-15 08:15:12,neither,0,1,0,21 days 12:09:32
96,1896784,2015-04-15 08:58:11,Condition Exists Cruise Control Enable Switch,unknown,unknown,unknown,596,31,False,73,1328,40.750370,-88.000787,2015-04-15 08:58:07,neither,0,1,0,0 days 00:43:36
97,1924499,2015-04-16 12:33:48,,unknown,unknown,unknown,50353,0,True,2,1328,36.067407,-86.435046,2015-04-16 12:34:25,neither,0,1,0,1 days 03:35:37
98,1925126,2015-04-16 12:59:07,,unknown,unknown,unknown,50353,0,False,2,1328,36.066712,-86.434537,2015-04-16 12:59:03,neither,0,1,0,0 days 00:25:19
