In [None]:
# Import libraries
import pandas as pd

In [None]:
# Set view options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [None]:
# Read in faults_diagnostics data
faults_diagnostics = pd.read_csv('../data/faults_diagnostics.csv', index_col = 'EventTimeStamp', parse_dates = ['EventTimeStamp', 'LocationTimeStamp'])

In [None]:
faults_diagnostics['EventCluster_3H'] = 1
faults_diagnostics['EventCluster_6H'] = 1
faults_diagnostics['EventCluster_12H'] = 1

rolling_window_3H = faults_diagnostics.groupby('EquipmentID', as_index = False)['EventCluster_3H'].rolling('3H').count()
rolling_window_6H = faults_diagnostics.groupby('EquipmentID', as_index = False)['EventCluster_6H'].rolling('6H').count()
rolling_window_12H = faults_diagnostics.groupby('EquipmentID', as_index = False)['EventCluster_12H'].rolling('12H').count()

In [None]:
rolling_window_3H['EventCluster_3H'] = (rolling_window_3H['EventCluster_3H'] == 1).cumsum()
rolling_window_6H['EventCluster_6H'] = (rolling_window_6H['EventCluster_6H'] == 1).cumsum()
rolling_window_12H['EventCluster_12H'] = (rolling_window_12H['EventCluster_12H'] == 1).cumsum()

In [None]:
faults_diagnostics = faults_diagnostics.drop(columns = ['EventCluster_3H', 'EventCluster_6H', 'EventCluster_12H'])
faults_diagnostics = faults_diagnostics.merge(rolling_window_3H, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.merge(rolling_window_6H, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.merge(rolling_window_12H, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.drop_duplicates()

In [None]:
any_derate_clusters_3h = faults_diagnostics[faults_diagnostics['spn'].isin([1569, 5246])]['EventCluster_3H'].unique()
any_derate_clusters_6h = faults_diagnostics[faults_diagnostics['spn'].isin([1569, 5246])]['EventCluster_6H'].unique()
any_derate_clusters_12h = faults_diagnostics[faults_diagnostics['spn'].isin([1569, 5246])]['EventCluster_12H'].unique()

partial_derate_clusters_3h = faults_diagnostics[faults_diagnostics['spn'].isin([1569])]['EventCluster_3H'].unique()
partial_derate_clusters_6h = faults_diagnostics[faults_diagnostics['spn'].isin([1569])]['EventCluster_6H'].unique()
partial_derate_clusters_12h = faults_diagnostics[faults_diagnostics['spn'].isin([1569])]['EventCluster_12H'].unique()

full_derate_clusters_3h = faults_diagnostics[faults_diagnostics['spn'].isin([5246])]['EventCluster_3H'].unique()
full_derate_clusters_6h = faults_diagnostics[faults_diagnostics['spn'].isin([5246])]['EventCluster_6H'].unique()
full_derate_clusters_12h = faults_diagnostics[faults_diagnostics['spn'].isin([5246])]['EventCluster_12H'].unique()

both_derate_clusters_3h = list(set(partial_derate_clusters_3h) & set(full_derate_clusters_3h))
both_derate_clusters_6h = list(set(partial_derate_clusters_6h) & set(full_derate_clusters_6h))
both_derate_clusters_12h = list(set(partial_derate_clusters_12h) & set(full_derate_clusters_12h))

In [None]:
non_derate_clusters_3h = faults_diagnostics[~faults_diagnostics['EventCluster_3H'].isin(any_derate_clusters_3h)]
non_derate_clusters_6h = faults_diagnostics[~faults_diagnostics['EventCluster_6H'].isin(any_derate_clusters_6h)]
non_derate_clusters_12h = faults_diagnostics[~faults_diagnostics['EventCluster_12H'].isin(any_derate_clusters_12h)]

any_derate_clusters_3h = faults_diagnostics[faults_diagnostics['EventCluster_3H'].isin(any_derate_clusters_3h)]
any_derate_clusters_6h = faults_diagnostics[faults_diagnostics['EventCluster_6H'].isin(any_derate_clusters_6h)]
any_derate_clusters_12h = faults_diagnostics[faults_diagnostics['EventCluster_12H'].isin(any_derate_clusters_12h)]

partial_derate_clusters_3h = faults_diagnostics[faults_diagnostics['EventCluster_3H'].isin(partial_derate_clusters_3h)]
partial_derate_clusters_6h = faults_diagnostics[faults_diagnostics['EventCluster_6H'].isin(partial_derate_clusters_6h)]
partial_derate_clusters_12h = faults_diagnostics[faults_diagnostics['EventCluster_12H'].isin(partial_derate_clusters_12h)]

full_derate_clusters_3h = faults_diagnostics[faults_diagnostics['EventCluster_3H'].isin(full_derate_clusters_3h)]
full_derate_clusters_6h = faults_diagnostics[faults_diagnostics['EventCluster_6H'].isin(full_derate_clusters_6h)]
full_derate_clusters_12h = faults_diagnostics[faults_diagnostics['EventCluster_12H'].isin(full_derate_clusters_12h)]

both_derate_clusters_3h = faults_diagnostics[faults_diagnostics['EventCluster_3H'].isin(both_derate_clusters_3h)]
both_derate_clusters_6h = faults_diagnostics[faults_diagnostics['EventCluster_6H'].isin(both_derate_clusters_6h)]
both_derate_clusters_12h = faults_diagnostics[faults_diagnostics['EventCluster_12H'].isin(both_derate_clusters_12h)]

In [None]:
print(str(len(faults_diagnostics[faults_diagnostics['spn'].isin([1569, 5246])])) + ' derate occurrences')
print(str(len(faults_diagnostics[faults_diagnostics['spn'].isin([1569])])) + ' partial derate occurrences')
print(str(len(faults_diagnostics[faults_diagnostics['spn'].isin([5246])])) + ' full derate occurrences')

In [None]:
print('EventCluster_3H: ' + str(len(non_derate_clusters_3h.groupby('EventCluster_3H').size())) + ' non-derate fault event clusters')
print('EventCluster_6H: ' + str(len(non_derate_clusters_3h.groupby('EventCluster_6H').size())) + ' non-derate fault event clusters')
print('EventCluster_12H: ' + str(len(non_derate_clusters_3h.groupby('EventCluster_12H').size())) + ' non-derate fault event clusters')
print('EventCluster_3H: ' + str(round(non_derate_clusters_3h.groupby('EventCluster_3H').size().mean(), ndigits = 2)) + ' events per non-derate fault cluster')
print('EventCluster_6H: ' + str(round(non_derate_clusters_6h.groupby('EventCluster_6H').size().mean(), ndigits = 2)) + ' events per non-derate fault cluster')
print('EventCluster_12H: ' + str(round(non_derate_clusters_12h.groupby('EventCluster_12H').size().mean(), ndigits = 2)) + ' events per non-derate fault cluster')

In [None]:
print('EventCluster_3H: ' + str(len(any_derate_clusters_3h.groupby('EventCluster_3H').size())) + ' derate fault event clusters')
print('EventCluster_6H: ' + str(len(any_derate_clusters_3h.groupby('EventCluster_6H').size())) + ' derate fault event clusters')
print('EventCluster_12H: ' + str(len(any_derate_clusters_3h.groupby('EventCluster_12H').size())) + ' derate fault event clusters')
print('EventCluster_3H: ' + str(round(any_derate_clusters_3h.groupby('EventCluster_3H').size().mean(), ndigits = 2)) + ' events per derate fault cluster')
print('EventCluster_6H: ' + str(round(any_derate_clusters_6h.groupby('EventCluster_6H').size().mean(), ndigits = 2)) + ' events per derate fault cluster')
print('EventCluster_12H: ' + str(round(any_derate_clusters_12h.groupby('EventCluster_12H').size().mean(), ndigits = 2)) + ' events per derate fault cluster')

In [None]:
print('EventCluster_3H: ' + str(len(partial_derate_clusters_3h.groupby('EventCluster_3H').size())) + ' partial derate fault event clusters')
print('EventCluster_6H: ' + str(len(partial_derate_clusters_3h.groupby('EventCluster_6H').size())) + ' partial derate fault event clusters')
print('EventCluster_12H: ' + str(len(partial_derate_clusters_3h.groupby('EventCluster_12H').size())) + ' partial derate fault event clusters')
print('EventCluster_3H: ' + str(round(partial_derate_clusters_3h.groupby('EventCluster_3H').size().mean(), ndigits = 2)) + ' events per partial derate fault cluster')
print('EventCluster_6H: ' + str(round(partial_derate_clusters_6h.groupby('EventCluster_6H').size().mean(), ndigits = 2)) + ' events per partial derate fault cluster')
print('EventCluster_12H: ' + str(round(partial_derate_clusters_12h.groupby('EventCluster_12H').size().mean(), ndigits = 2)) + ' events per partial derate fault cluster')

In [None]:
print('EventCluster_3H: ' + str(len(full_derate_clusters_3h.groupby('EventCluster_3H').size())) + ' full derate fault event clusters')
print('EventCluster_6H: ' + str(len(full_derate_clusters_3h.groupby('EventCluster_6H').size())) + ' full derate fault event clusters')
print('EventCluster_12H: ' + str(len(full_derate_clusters_3h.groupby('EventCluster_12H').size())) + ' full derate fault event clusters')
print('EventCluster_3H: ' + str(round(full_derate_clusters_3h.groupby('EventCluster_3H').size().mean(), ndigits = 2)) + ' events per full derate fault cluster')
print('EventCluster_6H: ' + str(round(full_derate_clusters_6h.groupby('EventCluster_6H').size().mean(), ndigits = 2)) + ' events per full derate fault cluster')
print('EventCluster_12H: ' + str(round(full_derate_clusters_12h.groupby('EventCluster_12H').size().mean(), ndigits = 2)) + ' events per full derate fault cluster')

In [None]:
print('EventCluster_3H: ' + str(len(both_derate_clusters_3h.groupby('EventCluster_3H').size())) + ' both derate fault event clusters')
print('EventCluster_6H: ' + str(len(both_derate_clusters_3h.groupby('EventCluster_6H').size())) + ' both derate fault event clusters')
print('EventCluster_12H: ' + str(len(both_derate_clusters_3h.groupby('EventCluster_12H').size())) + ' both derate fault event clusters')
print('EventCluster_3H: ' + str(round(both_derate_clusters_3h.groupby('EventCluster_3H').size().mean(), ndigits = 2)) + ' events per both derate fault cluster')
print('EventCluster_6H: ' + str(round(both_derate_clusters_6h.groupby('EventCluster_6H').size().mean(), ndigits = 2)) + ' events per both derate fault cluster')
print('EventCluster_12H: ' + str(round(both_derate_clusters_12h.groupby('EventCluster_12H').size().mean(), ndigits = 2)) + ' events per both derate fault cluster')

In [None]:
faults_diagnostics['next_spn'] = faults_diagnostics.groupby(['EquipmentID'])['spn'].shift()
pd.crosstab(faults_diagnostics['spn'], faults_diagnostics['next_spn'])