In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Set view options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [None]:
faults_diagnostics = pd.read_csv('../data/faults_diagnostics.csv', index_col = 'EventTimeStamp', parse_dates = ['EventTimeStamp'], low_memory = False)

In [None]:
faults_diagnostics['PartialDerate'] = (faults_diagnostics['spn'] == 1569)
faults_diagnostics['FullDerate'] = (faults_diagnostics['spn'] == 5246)

rolling_7D = faults_diagnostics.loc[::-1].groupby('EquipmentID', as_index = False)[['PartialDerate', 'FullDerate']].rolling('7D').sum()[::-1]
rolling_7D.loc[rolling_7D['PartialDerate'] > 0, 'PartialDerate'] = 1
rolling_7D.loc[rolling_7D['FullDerate'] > 0, 'FullDerate'] = 1

rolling_7D['PartialDerate'] = ((rolling_7D['EquipmentID'].shift() != rolling_7D['EquipmentID']) | (rolling_7D['PartialDerate'].shift() != rolling_7D['PartialDerate'])).cumsum()
rolling_7D['FullDerate'] = ((rolling_7D['EquipmentID'].shift() != rolling_7D['EquipmentID']) | (rolling_7D['FullDerate'].shift() != rolling_7D['FullDerate'])).cumsum()

faults_diagnostics = faults_diagnostics.drop(columns = ['PartialDerate', 'FullDerate'])
faults_diagnostics = faults_diagnostics.merge(rolling_7D, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.sort_values(['EquipmentID', 'EventTimeStamp']).drop_duplicates()

In [None]:
partial_derate_clusters = faults_diagnostics.loc[faults_diagnostics['PartialDerate'].isin(faults_diagnostics.loc[faults_diagnostics['spn'] == 1569, 'PartialDerate'])]
partial_derate_mask = partial_derate_clusters.groupby('EquipmentID')['spn'].apply(lambda x: x.shift().eq(1569).cumsum().eq(0)).to_list()
partial_derates = partial_derate_clusters[partial_derate_mask].reset_index()

partial_derates['DeltaT'] = partial_derates.loc[::-1].groupby(['EquipmentID', 'PartialDerate'])['EventTimeStamp'].diff().dt.total_seconds().div(3600)[::-1].abs()
partial_derates['DeltaT'] = partial_derates.loc[::-1].groupby(['EquipmentID', 'PartialDerate'])['DeltaT'].cumsum()[::-1]
partial_derates = partial_derates.loc[partial_derates['DeltaT'] != 0].set_index('EventTimeStamp').drop_duplicates(subset = ['EquipmentID', 'PartialDerate', 'spn'], keep = 'last')
partial_derates['DeltaT'] = partial_derates['DeltaT'].fillna(0)

partial_derates.groupby('spn')['DeltaT'].describe().sort_values('count', ascending = False)

In [None]:
full_derate_clusters = faults_diagnostics.loc[faults_diagnostics['FullDerate'].isin(faults_diagnostics.loc[faults_diagnostics['spn'] == 5246, 'FullDerate'])]
full_derate_mask = full_derate_clusters.groupby('EquipmentID')['spn'].apply(lambda x: x.shift().eq(5246).cumsum().eq(0)).to_list()
full_derates = full_derate_clusters[full_derate_mask].reset_index()

full_derates['DeltaT'] = full_derates.loc[::-1].groupby(['EquipmentID', 'FullDerate'])['EventTimeStamp'].diff().dt.total_seconds().div(3600)[::-1].abs()
full_derates['DeltaT'] = full_derates.loc[::-1].groupby(['EquipmentID', 'FullDerate'])['DeltaT'].cumsum()[::-1]
full_derates = full_derates.loc[full_derates['DeltaT'] != 0].set_index('EventTimeStamp').drop_duplicates(subset = ['EquipmentID', 'FullDerate', 'spn'], keep = 'last')
full_derates['DeltaT'] = full_derates['DeltaT'].fillna(0)

full_derates.groupby('spn')['DeltaT'].describe().sort_values('count', ascending = False)

In [None]:
event_timestamps = faults_diagnostics.reset_index()[['EventTimeStamp','EquipmentID']].drop_duplicates()
event_timestamps['DeltaT'] = event_timestamps.groupby('EquipmentID')['EventTimeStamp'].diff(-1).dt.total_seconds().div(3600).abs()
event_timestamps = faults_diagnostics.merge(event_timestamps, on = ['EventTimeStamp', 'EquipmentID'])
event_timestamps = event_timestamps[event_timestamps['DeltaT'] <= 168]

In [None]:
fig, ax = plt.subplots(figsize = (8, 5), dpi = 300)
sns.histplot(event_timestamps['DeltaT'])  
plt.xticks([0,24,48,72,96,120,144,168])
ax.set_yscale('log', base = 2)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (8, 5), dpi = 300)
sns.histplot(event_timestamps[event_timestamps['spn'] == 1569]['DeltaT'])
plt.xticks([0,24,48,72,96,120,144,168])
ax.set_yscale('log', base = 2)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (8, 5), dpi = 300)
sns.histplot(event_timestamps[event_timestamps['spn'] == 5246]['DeltaT'])
plt.xticks([0,24,48,72,96,120,144,168])
ax.set_yscale('log', base = 2)
plt.show()

In [None]:
spn_crosstab = faults_diagnostics
spn_crosstab['prev_spn'] = spn_crosstab.groupby(['EquipmentID'])['spn'].shift(-1)
spn_crosstab = spn_crosstab.dropna(subset = 'prev_spn')
spn_crosstab = pd.crosstab(spn_crosstab['spn'], spn_crosstab['prev_spn'].astype(int))
spn_crosstab.style.background_gradient(cmap = 'Blues')