# Big G Express Model Data Prep

## Imports

In [97]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

## Read in

In [89]:
faults = pd.read_pickle('../data/faults_df.pickle')
diag = pd.read_pickle('../data/diag_df.pickle')
fdwide = pd.read_pickle('../data/fdwide_df.pickle')

## Add grouping column to fdwide: eventGroup

Note that this event grouper includes 75% derate events. If we choose not to include those events, that part of the logic can easily be removed.

In [90]:
fdwide = fdwide.sort_values(['EquipmentID', 'EventTimeStamp']).reset_index()

In [91]:
increment_check_full_only = (

                (fdwide['spn'].shift() == 5246)  # full derate in above row
                | 
                (fdwide['EquipmentID'] != fdwide['EquipmentID'].shift())    # Current row is different truck from previous row

    )

increment_check_either = (
    
                (fdwide['spn'].shift() == 5246)  # full derate in above row
                | 
                ((fdwide['spn'].shift() == 1596) & (fdwide['spn'].shift() == 31))   # partial derate in above row
                | 
                (fdwide['EquipmentID'] != fdwide['EquipmentID'].shift())    # Current row is different truck from previous row

    )

fdwide['eventGroupFull'] = increment_check_full_only.cumsum()
fdwide['eventGroupEither'] = increment_check_full_only.cumsum()

In [95]:
fdwide['timeTillFullLast'] = fdwide.groupby('eventGroupFull')['EventTimeStamp'].transform(max) - fdwide['EventTimeStamp']
fdwide['timeTillEitherLast'] = fdwide.groupby('eventGroupFull')['EventTimeStamp'].transform(max) - fdwide['EventTimeStamp']

In [100]:
fdwide['within24HoursFull'] = fdwide['timeTillFullLast'] < dt.timedelta(hours = 24)
fdwide['within24HoursEither'] = fdwide['timeTillEitherLast'] < dt.timedelta(hours = 24)

In [101]:
fdwide

Unnamed: 0,index,RecordID,ESS_Id,EventTimeStamp,eventDescription,actionDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,...,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure,eventGroupFull,eventGroupEither,timeTillFullLast,timeTillEitherLast,within24HoursFull,within24HoursEither
0,33355,34467,2092800,2015-04-26 06:16:16,High (Severity High) Engine Speed,,unknown,unknown,unknown,unknown,...,,,,,1,1,442 days 04:37:00,442 days 04:37:00,False,False
1,35080,36192,2123154,2015-04-28 05:29:21,High (Severity High) Engine Speed,,unknown,unknown,unknown,unknown,...,,,,,1,1,440 days 05:23:55,440 days 05:23:55,False,False
2,47186,48298,2346346,2015-05-10 07:11:34,Incorrect Data J1939 Network #1 Primary Vehicl...,,unknown,unknown,unknown,unknown,...,4.728246,3276.75,8.8,0.87,1,1,428 days 03:41:42,428 days 03:41:42,False,False
3,47213,48325,2346621,2015-05-10 07:59:25,Incorrect Data J1939 Network #1 Primary Vehicl...,,unknown,unknown,unknown,unknown,...,,,,,1,1,428 days 02:53:51,428 days 02:53:51,False,False
4,48303,49415,2363162,2015-05-11 13:11:20,Incorrect Data J1939 Network #1 Primary Vehicl...,,unknown,unknown,unknown,unknown,...,4.475814,3276.75,14.4,0.58,1,1,426 days 21:41:56,426 days 21:41:56,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187330,4494,4495,1052146,2015-02-24 16:24:05,Low (Severity Medium) Catalyst Tank Level,,05317106*04119044*051914190353*09400015*G1*BDR*,79751302,6X1u13D1500000000,CMMNS,...,,,,,3115,3115,1 days 21:26:54,1 days 21:26:54,False,False
1187331,6438,6439,1089561,2015-02-26 13:12:11,,,05317106*04119044*051914190353*09400015*G1*BDR*,79751302,6X1u13D1500000000,CMMNS,...,2.058292,3276.75,0.0,0.29,3115,3115,0 days 00:38:48,0 days 00:38:48,True,True
1187332,6446,6447,1090499,2015-02-26 13:50:59,,,05317106*04119044*051914190353*09400015*G1*BDR*,79751302,6X1u13D1500000000,CMMNS,...,,,,,3115,3115,0 days 00:00:00,0 days 00:00:00,True,True
1187333,4952,4953,1059704,2015-02-25 06:08:43,Incorrect Data J1939 Network #1 Primary Vehicl...,,unknown,unknown,unknown,unknown,...,4.378725,3276.75,0.0,1.74,3116,3116,0 days 00:29:57,0 days 00:29:57,True,True
