In [2]:
import pandas as pd
from datetime import datetime
from shapely.geometry import Point
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
from wordcloud import WordCloud, STOPWORDS

# Big G notebook Part 3: The Seasonal Filtering
This notebook will attempt to look into the trucks that had full derates from a seasonal and distance from nearest hub perspective.

In [3]:
# Here's the pkl - 1,057,461 entries.
on_faults = pd.read_pickle('../data/on_faults.pkl')
on_faults.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1057461 entries, 0 to 1057460
Data columns (total 49 columns):
 #   Column                     Non-Null Count    Dtype         
---  ------                     --------------    -----         
 0   RecordID                   1057461 non-null  int64         
 1   ESS_Id                     1057461 non-null  int64         
 2   EventTimeStamp             1057461 non-null  datetime64[ns]
 3   eventDescription           1006873 non-null  object        
 4   ecuSoftwareVersion         793094 non-null   object        
 5   ecuSerialNumber            751418 non-null   object        
 6   ecuModel                   1001485 non-null  object        
 7   ecuMake                    1001485 non-null  object        
 8   ecuSource                  1057461 non-null  int64         
 9   spn                        1057461 non-null  int64         
 10  fmi                        1057461 non-null  int64         
 11  active                     1057461 no

Find the closest service center and its distance from the fault code.

In [4]:
# Creating the min_distance_to_service_center feature
on_faults['dist_to_nearest_sc'] = on_faults[['dist_A', 'dist_B', 'dist_C']].min(axis=1)

# Creating the nearest_service_center feature
on_faults['nearest_sc'] = on_faults[['dist_A', 'dist_B', 'dist_C']].idxmin(axis=1)

# Dropping the 'dist_A', 'dist_B', and 'dist_C' columns
on_faults.drop(['dist_A', 'dist_B', 'dist_C'], axis=1, inplace=True)

# Removing the 'dist_' prefix from each value in the 'nearest_sc' column
on_faults['nearest_sc'] = on_faults['nearest_sc'].str.replace('dist_', '')

Finding the season in which the fault occurred

In [5]:
# Function to find the season were the error occured.
def get_season(month):
    if month in {3, 4, 5}:
        return 'Spring'
    elif month in {6, 7, 8}:
        return 'Summer'
    elif month in {9, 10, 11}:
        return 'Fall'
    else:
        return 'Winter'

# Extracting the month from EventTimeStamp
on_faults['month'] = on_faults['EventTimeStamp'].dt.month

# Creating the season feature based on the month
on_faults['season'] = on_faults['month'].apply(get_season)

# Dropping the month column, as it's not needed anymore
on_faults.drop('month', axis=1, inplace=True)

In [6]:
# Find unique EquipmentIDs for trucks that have ever experienced SPN = 5246
experienced_full_derate = on_faults[on_faults['spn'] == 5246]['EquipmentID'].unique()

# Filter the entire dataset using the unique EquipmentIDs
full_derates = on_faults[on_faults['EquipmentID'].isin(experienced_full_derate)]

In [7]:
full_derates

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,spn,...,LampStatus,ParkingBrake,ServiceDistance,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure,dist_to_nearest_sc,nearest_sc,season
39,53,991047,2015-02-21 12:18:36,Special Instructions System Diagnostic Code #1,unknown,unknown,unknown,unknown,11,611,...,1279,,,,,,,178.290444,B,Winter
40,54,991048,2015-02-21 12:18:36,Special Instructions Wheel Sensor ABS Axle 2 Left,unknown,unknown,unknown,unknown,11,791,...,1279,,,,,,,178.290444,B,Winter
41,55,991056,2015-02-21 12:19:17,Data May Be Invalid Relative Speed; Rear Axle ...,unknown,unknown,unknown,unknown,0,907,...,17407,,,,,,,178.266015,B,Winter
42,56,991057,2015-02-21 12:18:38,Special Instructions System Diagnostic Code #1,unknown,unknown,unknown,unknown,11,611,...,1279,,,,,,,178.266015,B,Winter
43,57,991058,2015-02-21 12:18:38,Special Instructions Wheel Sensor ABS Axle 2 Left,unknown,unknown,unknown,unknown,11,791,...,1279,,,,,,,178.266015,B,Winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1057432,1248429,123886552,2020-03-06 12:11:54,High Voltage (Left Fuel Level Sensor),,,CECU3B-NAMUX4,PACCR,49,829,...,65535,,,,,,,719.818733,C,Spring
1057434,1248431,123891846,2020-03-06 12:20:36,High Voltage (Fuel Level),,,CECU3B-NAMUX4,PACCR,49,96,...,1279,True,,0,,100,0.58,719.923890,C,Spring
1057435,1248432,123891847,2020-03-06 12:20:36,High Voltage (Left Fuel Level Sensor),,,CECU3B-NAMUX4,PACCR,49,829,...,1279,True,,0,,100,0.58,719.923890,C,Spring
1057441,1248438,123893761,2020-03-06 12:51:53,High Voltage (Fuel Level),,,CECU3B-NAMUX4,PACCR,49,96,...,65535,,,,,,,719.897775,C,Spring


In [10]:
# Create one-hot encoding of the SPN column
spn_one_hot = pd.get_dummies(on_faults['spn'], prefix='SPN')

# Set the index of the one-hot encoded DataFrame to be the same as the original DataFrame
spn_one_hot.index = on_faults.index

# Concatenate the original DataFrame with the one-hot encoded DataFrame
on_faults_one_hot = pd.concat([on_faults, spn_one_hot], axis=1)

# Drop the original SPN column
on_faults_one_hot.drop('spn', axis=1, inplace=True)
on_faults_one_hot

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuSerialNumber,ecuModel,ecuMake,ecuSource,fmi,...,SPN_520413,SPN_520953,SPN_521032,SPN_523530,SPN_523531,SPN_523543,SPN_524033,SPN_524037,SPN_524071,SPN_524287
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,unknown,unknown,0,17,...,0,0,0,0,0,0,0,0,0,0
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,unknown,unknown,11,12,...,0,0,0,0,0,0,0,0,0,0
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,unknown,11,2,...,0,0,0,0,0,0,0,0,0,0
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,unknown,unknown,unknown,unknown,11,2,...,0,0,0,0,0,0,0,0,0,0
4,5,990416,2015-02-21 11:39:41,,22281684P01*22357957P01*22362082P01*,13063430,0USA13_13_0415_2238A,VOLVO,0,17,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1057456,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,04384413*22383729*082218154102*60701732*G1*BGT*,80156139,6X1u17D1500000000,CMMNS,0,17,...,0,0,0,0,0,0,0,0,0,0
1057457,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,04358814*06099720*030816202706*09400153*G1*BDR*,79932020,6X1u13D1500000000,CMMNS,0,31,...,0,0,0,0,0,0,0,0,0,0
1057458,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,05317106*05100987*050719120655*09401585*G1*BDR*,79880653,6X1u13D1500000000,CMMNS,0,10,...,0,0,0,0,0,0,0,0,0,0
1057459,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,04384413*22544852*090619141107*60701756*G1*BGT*,,,,0,18,...,0,0,0,0,0,0,0,0,0,0
