In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime as dt

In [2]:
#Read in J1939Faults. low_memory = false because of warning about mixed type columns.
faults = pd.read_csv('../data/J1939Faults.csv',
                     index_col = 'RecordID',
                     parse_dates = ['EventTimeStamp', 'LocationTimeStamp'],
                     low_memory = False)
#remove faults that occur at service locations. Had to remove the last digit because the rows dont have it.
service_location1 = faults[(faults["Latitude"] == 36.066666) & (faults["Longitude"] == -86.434722)]

service_location2 = faults[(faults["Latitude"] == 35.588333) & (faults["Longitude"] == -86.443888)]

service_location3 = faults[(faults["Latitude"] == 36.1950) & (faults["Longitude"] ==  -83.174722)]

#concat service_locations for removal
service_locations = pd.concat([service_location1, service_location2, service_location3])

#drop service locations
faults_cleaned = faults.drop(service_locations.index)

#remove EquipmentIDs that are longer than 5 Characters per the README
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].map(len) <= 5]

#removed columns that only contained null values or would not be valuable because they're unique to the truck.
faults_cleaned = faults_cleaned.drop(['actionDescription', 'faultValue', 'ecuSerialNumber',
                                      'ecuSource', 'MCTNumber'], axis = 1).reset_index()

  mask |= (ar1 == a)


In [3]:
#Limit to years 2015-2020, Had to add down to seconds because it kept changing the datatype after filtering
faults_cleaned = faults_cleaned[(faults_cleaned['EventTimeStamp'] >= '2015-01-01 00:00:01') & (faults_cleaned['EventTimeStamp'] <= '2020-12-31 12:59:59')]



In [4]:
#prepare derates for dummization
faults_cleaned.loc[faults_cleaned['spn'] != 1569, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] != 5246, 'spn_derate'] = 'neither'
faults_cleaned.loc[faults_cleaned['spn'] == 5246, 'spn_derate'] = 'full'
faults_cleaned.loc[faults_cleaned['spn'] == 1569, 'spn_derate'] = 'partial'  


faults_cleaned = faults_cleaned.set_index('RecordID')

#get dummies
derates = pd.get_dummies(faults_cleaned['spn_derate'])
derates

#merge back with original dataset
faults_cleaned = faults_cleaned.merge(derates, left_on = 'RecordID', right_on = 'RecordID') 

In [5]:
#sorts and groups by truck. then sorts by event timestamp. this made the diff() calculate correctly 
#    ¯\_(ツ)_/¯
#code shamelessly stolen from https://arccoder.medium.com/pandas-sort-within-groups-e1f3b6a10a3f
faults_cleaned = faults_cleaned.sort_values(['EquipmentID'], ascending=True) \
    .groupby(['EquipmentID'], sort=False) \
    .apply(lambda x: x.sort_values(['EventTimeStamp'], ascending=True)) \
    .reset_index(drop=True)


faults_cleaned['timedelta'] = faults_cleaned.groupby('EquipmentID')['EventTimeStamp'].diff()

In [20]:
faults_cleaned['timedelta']

0                      NaT
1          0 days 00:00:00
2          0 days 01:23:09
3          0 days 00:43:50
4         56 days 00:45:23
                ...       
1184175    0 days 00:52:09
1184176    1 days 20:48:06
1184177    0 days 00:38:48
1184178                NaT
1184179    0 days 00:29:57
Name: timedelta, Length: 1184180, dtype: timedelta64[ns]

In [6]:
faults_cleaned['EventTimeStamp'].dt.to_period('Y').value_counts()

2016    332175
2015    325536
2017    254680
2018    143289
2019    111321
2020     17179
Freq: A-DEC, Name: EventTimeStamp, dtype: int64

### Based on the distribution of years I think it would be best to limit the dataset to years 2015-2020 

#### 2015 -2020 are consecutive while before 2015 it skips years 2014, 2013, 2012 and picks back up at 2011. It's possible that some trucks from 2014 carry over into 2015, but unlikely any from 2011 are carried over since they limit their lease to 4 years.

# In the full dataset
### How many trucks have a full derate/partial derate? 

#### 210 have full derates 

#### 498 have partial

#### 182 have both. 

Interestingly, when filtering down for years 2015-2020, Full derates went down from 211 to 210 but the partial remained unchanged. 

In [7]:
full = faults_cleaned[(faults_cleaned['spn'] == 5246)]
full = full['EquipmentID'].unique()

len(full)

210

In [8]:
partial = faults_cleaned[(faults_cleaned['spn'] == 1569)]
partial = partial['EquipmentID'].unique()

len(partial)

498

In [9]:
intersection = np.intersect1d(full, partial)

len(intersection)

182

In [10]:
intersection

array(['1329', '1339', '1366', '1373', '1375', '1378', '1383', '1384',
       '1389', '1391', '1395', '1396', '1398', '1399', '1401', '1403',
       '1407', '1417', '1418', '1419', '1431', '1437', '1440', '1443',
       '1444', '1452', '1453', '1457', '1458', '1467', '1472', '1473',
       '1477', '1486', '1487', '1488', '1490', '1492', '1501', '1508',
       '1509', '1519', '1549', '1551', '1552', '1554', '1556', '1557',
       '1559', '1560', '1561', '1563', '1564', '1566', '1567', '1571',
       '1572', '1573', '1575', '1579', '1581', '1582', '1584', '1585',
       '1586', '1590', '1591', '1592', '1594', '1595', '1598', '1599',
       '1600', '1601', '1602', '1603', '1604', '1605', '1621', '1623',
       '1630', '1637', '1643', '1654', '1657', '1659', '1661', '1663',
       '1665', '1668', '1669', '1683', '1686', '1689', '1691', '1692',
       '1696', '1698', '1704', '1711', '1718', '1731', '1732', '1739',
       '1743', '1751', '1757', '1758', '1764', '1768', '1772', '1778',
      

In [None]:
faults_cleaned['timedelta'].describe()

In [17]:
def get_number(n):
    position = faults_cleaned[faults_cleaned['spn']==5246].index[0]
    find = range(position - (n-1), position + 1)
    return faults_cleaned.loc[find]

In [21]:
faults_cleaned[faults_cleaned['spn']==5246]

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
100,1070646,2015-02-25 13:53:08,,unknown,unknown,unknown,5246,0,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,full,1,0,0,0 days 00:00:00
103,1071907,2015-02-25 14:47:00,,unknown,unknown,unknown,5246,0,False,1,1329,39.399629,-82.974814,2015-02-25 14:46:56,full,1,0,0,0 days 00:53:52
2049,2928718,2015-06-12 15:35:22,,unknown,unknown,unknown,5246,0,True,1,1339,37.035324,-86.336018,2015-06-12 15:35:58,full,1,0,0,0 days 07:11:07
2057,2958788,2015-06-15 11:04:15,,unknown,unknown,unknown,5246,0,False,1,1339,36.066620,-86.434675,2015-06-15 11:04:10,full,1,0,0,0 days 00:05:50
16478,2918010,2015-06-12 06:13:27,,unknown,unknown,unknown,5246,0,True,1,1366,35.146018,-86.578888,2015-06-12 07:17:28,full,1,0,0,0 days 02:15:38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1173781,69085173,2018-11-21 13:48:29,,unknown,unknown,unknown,5246,19,False,42,305,38.192546,-85.707685,2018-11-20 23:30:06,full,1,0,0,0 days 22:02:11
1174617,60949742,2018-09-07 11:22:40,,05317106*04075152*092613211021*09300006*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,True,1,306,35.997500,-86.595092,2018-09-07 11:23:15,full,1,0,0,0 days 04:35:35
1174620,61135212,2018-09-09 15:03:53,,05317106*04075152*092613211021*09300006*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,False,1,306,36.067037,-86.434675,2018-09-09 15:04:28,full,1,0,0,0 days 01:18:17
1174625,61162256,2018-09-10 04:41:48,,05317106*04075152*092613211021*09300006*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,False,1,306,36.066157,-86.435138,2018-09-10 04:41:58,full,1,0,0,0 days 00:00:00


In [24]:
pd.set_option('display.max_rows', 100)

In [36]:
truck_num = '1373'
truck = faults_cleaned[faults_cleaned['EquipmentID']==truck_num]

In [37]:
truck[truck['spn']==5246].index[0]

21373

In [38]:
first_full_index = truck[truck['spn']==5246].index[0]

In [39]:
truck.loc[first_full_index]

ESS_Id                                                           6733196
EventTimeStamp                                       2016-01-08 07:18:31
eventDescription                                                     NaN
ecuSoftwareVersion       04993120*00016941*051215183709*07700066*I0*BBZ*
ecuModel                                               6X1u10D1500000000
ecuMake                                                            CMMNS
spn                                                                 5246
fmi                                                                    0
active                                                              True
activeTransitionCount                                                  1
EquipmentID                                                         1373
Latitude                                                       33.938333
Longitude                                                     -81.291759
LocationTimeStamp                                  

In [40]:
truck.loc[first_full_index-10:first_full_index]

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
21363,6509561,2015-12-24 09:34:30,Condition Exists Catalyst Dosing Unit Input Lines,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,3362,31,True,1,1373,33.809907,-84.498194,2015-12-24 09:35:06,neither,0,1,0,0 days 00:20:16
21364,6510017,2015-12-24 10:30:40,Condition Exists Catalyst Dosing Unit Input Lines,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,3362,31,False,1,1373,33.809907,-84.498287,2015-12-24 10:30:36,neither,0,1,0,0 days 00:56:10
21365,6612318,2015-12-31 20:04:00,Data May Be Invalid Accelerator Pedal Position 1,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,91,19,True,1,1373,32.521342,-83.743611,2015-12-31 20:04:36,neither,0,1,0,7 days 09:33:20
21366,6612319,2015-12-31 20:04:01,Incorrect Data J1939 Network #1 Primary Vehicl...,unknown,unknown,unknown,639,2,True,127,1373,32.521342,-83.743611,2015-12-31 20:04:36,neither,0,1,0,0 days 00:00:01
21367,6612349,2015-12-31 20:09:29,Incorrect Data J1939 Network #1 Primary Vehicl...,unknown,unknown,unknown,639,2,False,127,1373,32.447407,-83.754629,2015-12-31 20:09:25,neither,0,1,0,0 days 00:05:28
21368,6612350,2015-12-31 20:09:29,Data May Be Invalid Accelerator Pedal Position 1,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,91,19,False,1,1373,32.447407,-83.754629,2015-12-31 20:09:25,neither,0,1,0,0 days 00:00:00
21369,6686614,2016-01-06 07:00:13,Low (Severity Medium) Engine Injector Metering...,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,157,18,True,1,1373,41.44574,-87.758148,2016-01-06 07:00:49,neither,0,1,0,5 days 10:50:44
21370,6692754,2016-01-06 10:43:58,Low (Severity Medium) Engine Injector Metering...,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,157,18,False,1,1373,41.825,-87.651064,2016-01-06 10:43:53,neither,0,1,0,0 days 03:43:45
21371,6723578,2016-01-07 15:41:31,,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,4340,5,True,1,1373,35.611435,-83.011481,2016-01-07 15:42:06,neither,0,1,0,1 days 04:57:33
21372,6724754,2016-01-07 16:38:34,Condition Exists Engine Protection Torque Derate,04993120*00016941*051215183709*07700066*I0*BBZ*,6X1u10D1500000000,CMMNS,1569,31,True,1,1373,35.275416,-82.38662,2016-01-07 16:39:09,partial,0,0,1,0 days 00:57:03


In [41]:
from tqdm.notebook import tqdm

In [42]:
derated_trucks = []
for truck_num in tqdm(full):
    truck = faults_cleaned[faults_cleaned['EquipmentID']==truck_num]
    first_full_index = truck[truck['spn']==5246].index[0]
    derated_trucks.append(truck.loc[first_full_index-10:first_full_index])
    
derated_trucks = pd.concat(derated_trucks)


  0%|          | 0/210 [00:00<?, ?it/s]

In [43]:
derated_trucks

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
99,1070647,2015-02-25 13:53:08,Condition Exists Engine Protection Torque Derate,unknown,unknown,unknown,1569,31,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,partial,0,0,1,NaT
100,1070646,2015-02-25 13:53:08,,unknown,unknown,unknown,5246,0,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,full,1,0,0,0 days 00:00:00
2039,2865286,2015-06-09 15:40:51,,unknown,unknown,unknown,50353,0,True,2,1339,35.749027,-78.868333,2015-06-09 15:41:28,neither,0,1,0,0 days 05:41:26
2040,2865323,2015-06-09 15:43:36,,unknown,unknown,unknown,50353,0,False,2,1339,35.745833,-78.905185,2015-06-09 15:43:32,neither,0,1,0,0 days 00:02:45
2041,2881610,2015-06-10 11:42:08,,unknown,unknown,unknown,5394,7,True,1,1339,35.756712,-77.869444,2015-06-10 11:42:43,neither,0,1,0,0 days 19:58:32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1174613,60855543,2018-09-06 15:38:45,Incorrect Data J1939 Network #1 Primary Vehicl...,BB41103* BB41104*,EC60-adv,BNDWS,639,2,False,127,306,36.066435,-86.433518,2018-09-06 15:38:40,neither,0,1,0,0 days 00:25:40
1174614,60904453,2018-09-07 05:45:39,Data Drifted High Aftertreatment 1 Outlet NOx,05317106*04075152*092613211021*09300006*G1*BDR*,6X1u13D1500000000,CMMNS,3226,20,True,1,306,36.167777,-86.529120,2018-09-07 05:46:16,neither,0,1,0,0 days 14:06:54
1174615,60904613,2018-09-07 05:48:43,Condition Exists NOx limits exceeded due to In...,05317106*04075152*092613211021*09300006*G1*BDR*,6X1u13D1500000000,CMMNS,4094,31,True,1,306,36.169907,-86.587731,2018-09-07 05:49:20,neither,0,1,0,0 days 00:03:04
1174616,60909415,2018-09-07 06:47:05,Condition Exists Engine Protection Torque Derate,05317106*04075152*092613211021*09300006*G1*BDR*,6X1u13D1500000000,CMMNS,1569,31,True,1,306,36.173240,-86.776990,2018-09-07 06:47:46,partial,0,0,1,0 days 00:58:22


In [19]:
get_number(3)

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta
98,1925126,2015-04-16 12:59:07,,unknown,unknown,unknown,50353,0,False,2,1328,36.066712,-86.434537,2015-04-16 12:59:03,neither,0,1,0,0 days 00:25:19
99,1070647,2015-02-25 13:53:08,Condition Exists Engine Protection Torque Derate,unknown,unknown,unknown,1569,31,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,partial,0,0,1,NaT
100,1070646,2015-02-25 13:53:08,,unknown,unknown,unknown,5246,0,True,1,1329,39.399583,-82.974768,2015-02-25 13:56:31,full,1,0,0,0 days 00:00:00


In [15]:
grouped = faults_cleaned.groupby('EquipmentID')

groups = []

for group in grouped:
    grouped.apply(lambda group: get_number(3))
    groups.append()

TypeError: append() takes exactly one argument (0 given)

In [11]:
faults_cleaned[faults_cleaned['EquipmentID']==1329]

Unnamed: 0,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp,spn_derate,full,neither,partial,timedelta


In [None]:
grouped.apply(lambda grouped: get_number(5))