In [None]:
import pandas as pd

In [None]:
# Set view options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

In [None]:
faults = pd.read_csv('../data/J1939Faults.csv', index_col = 'RecordID', parse_dates = ['EventTimeStamp', 'LocationTimeStamp']) \
    .drop(columns = ['actionDescription', 'faultValue'])

diagnostics = pd.read_csv('../data/VehicleDiagnosticOnboardData.csv') \
    .pivot(index = 'FaultId', columns = 'Name', values = 'Value')

faults_diagnostics = faults.merge(diagnostics, left_on = 'RecordID', right_on = 'FaultId') \
    .set_index('EventTimeStamp').sort_index()

In [None]:
faults_diagnostics['EventCluster_3H'] = 1
faults_diagnostics['EventCluster_6H'] = 1
faults_diagnostics['EventCluster_12H'] = 1

rolling_window_3H = faults_diagnostics.groupby('EquipmentID', as_index = False)['EventCluster_3H'].rolling('3H').count()
rolling_window_6H = faults_diagnostics.groupby('EquipmentID', as_index = False)['EventCluster_6H'].rolling('6H').count()
rolling_window_12H = faults_diagnostics.groupby('EquipmentID', as_index = False)['EventCluster_12H'].rolling('12H').count()

rolling_window_3H['EventCluster_3H'] = (rolling_window_3H['EventCluster_3H'].shift() >= rolling_window_3H['EventCluster_3H']).cumsum() + 1
rolling_window_6H['EventCluster_6H'] = (rolling_window_6H['EventCluster_6H'].shift() >= rolling_window_6H['EventCluster_6H']).cumsum() + 1
rolling_window_12H['EventCluster_12H'] = (rolling_window_12H['EventCluster_12H'].shift() >= rolling_window_12H['EventCluster_12H']).cumsum() + 1

faults_diagnostics = faults_diagnostics.drop(columns = ['EventCluster_3H', 'EventCluster_6H', 'EventCluster_12H'])
faults_diagnostics = faults_diagnostics.merge(rolling_window_3H, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.merge(rolling_window_6H, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.merge(rolling_window_12H, on = ['EventTimeStamp', 'EquipmentID'])
faults_diagnostics = faults_diagnostics.drop_duplicates()

In [None]:
display(faults_diagnostics.groupby('EventCluster_3H').size().mean())
display(faults_diagnostics.groupby('EventCluster_6H').size().mean())
display(faults_diagnostics.groupby('EventCluster_12H').size().mean())