In [None]:
import pandas as pd
import numpy as np
import pickle

import plotly.express as px

In [None]:
#final_faults = pd.read_pickle('../data/final_faults.pkl')
final = pd.read_pickle('../data/final_data.pkl')

In [None]:
final['timeUntilDerate_dec'] = [round(x.total_seconds()/86400, 2) for x in final['timeUntilDerate']]

In [None]:
final.columns

### First look at partial derates

In [None]:
partial_derates = final.loc[final['spn'] == 1569]
partial_derates.info()

In [None]:
pd1 = (
    partial_derates
    .groupby('derate')
    .agg(
        total = ('spn', 'count')
        )
    .reset_index()
)
pd1['variable'] = 'Partial Derates'


pd2 = (
    partial_derates
    .groupby('derate')
    .agg(
        total = ('EquipmentID', 'nunique')
    )
    .reset_index()
)
pd2['variable'] = 'Trucks'

pd_totals = pd.concat([pd1, pd2])

pd_totals

In [None]:
df = pd_totals

fig = px.histogram(df, 
                   x="variable",
                   y="total",
                   color='derate', 
                   barmode='group',
                   labels={
                       'derate' : 'Derate Status',
                       'variable' : 'Category'
                   },
                   color_discrete_map={
                       'No Derate' : '#9900ff',
                       'Derate' : '#00ff99'
                   }, 
                   text_auto=True,
                   template="plotly_white"
                  )

fig.update_layout(
    title_text="Total Partial Derate Faults and Total Trucks Experiencing Partial Derates by Derate Status",
    yaxis_title=''
)


fig.show()

In [None]:
pd_per = (
    partial_derates
    .groupby('derate')
    .agg(
        total_pds = ('spn', 'count'),
        trucks = ('EquipmentID', 'nunique')
        )
    .reset_index()
)

pd_per['pd_per_truck'] = round(pd_per['total_pds'] / pd_per['trucks'], 1)

pd_per

In [None]:
df = pd_per

fig = px.bar(df, 
                   x="derate",
                   y="pd_per_truck",
                   color='derate',
                   labels={
                       'derate' : 'Derate Status',
                       'pd_per_truck' : 'Partial  Derates per Truck',
                       'trucks' : 'Total Trucks',
                       'total_pds' : 'Total Partial Derates'
                   }, 
                   color_discrete_map={
                       'No Derate' : '#9900ff',
                       'Derate' : '#00ff99'
                   }, 
                   text_auto=True,
                   hover_data={
                       'trucks' : True,
                       'total_pds' : True,
                       'pd_per_truck' : False,
                       'derate' : False
                   },
                   template="plotly_white"
                  )

fig.update_layout(title_text="Partial Derates per Truck by Derate Status",
                  xaxis={'visible': False, 'showticklabels': False}
                 )


fig.show()

In [None]:
#fig.write_image("../images/pd_per_truck.jpeg")

In [None]:
df = partial_derates.loc[(final['derate'] == 'Derate')]

fig = px.histogram(df, 
                   x=(df['timeUntilDerate_dec'] #* 24
                     ),
                   labels={
                       'x' : ' Days Before Derate'
                   },
                   #histnorm = 'percent',
                  template = "plotly_white")
fig.update_traces(
    xbins={
        'start':0.0,
        'end':7,
        'size':.25
        
    }
)

fig.for_each_trace(lambda t: t.update(hovertemplate=t.hovertemplate.replace(#"count", "Faults",
                                                                            "percent", "Proportion of Partial")))

#fig.update_yaxes(ticksuffix = "%")

fig.update_xaxes(range = [-1, 7])

fig.update_layout(#title_text="Distribution of Partial Derates in the Week Ahead of Derates",
                  yaxis_title=""
                 )

fig.show()

In [None]:
pd_per_pre = (
    partial_derates
    .loc[partial_derates['timeUntilDerate_dec'] < .34]
    .groupby('derate')
    .agg(
        total_pds = ('spn', 'count'),
        trucks = ('EquipmentID', 'nunique')
        )
    .reset_index()
)

pd_per_pre['pd_per_truck'] = round(pd_per_pre['total_pds'] / pd_per_pre['trucks'], 1)

pd_per_pre

Partial Derates are not especially useful predictors:
- Just under 1/3 of trucks that experience partial derates go on to experience full derates
- Of the trucks that experience partial derates, the ones that go on to have full derates do have a slightly higher number of partial derates on average
- BUT this value of partial derates per truck actually gets very small in the week leading up to the full derate, so that increased occurrence is more a reflection of the ongoing persistence of experiencing partial derates

### Engine Coolant Temperature

In [None]:
final.loc[~final['EngineCoolantTemperature'].isna()]['EngineCoolantTemperature'].describe()

In [None]:
df = final.loc[~final['EngineCoolantTemperature'].isna()]

fig = px.histogram(df, 
                   x='EngineCoolantTemperature',
                   color = 'derate',
                   histnorm = 'percent'
                  )

fig.show()

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < .05]

fig = px.histogram(df, 
                   x='EngineCoolantTemperature',
                   color = 'derate',
                   histnorm = 'percent'
                  )

fig.show()

No real change in the distribution of Engine Coolant Temp ahead of derates

### Engine RPM

In [None]:
final.loc[~final['EngineRpm'].isna()]['EngineRpm'].describe()

In [None]:
df = final.loc[~final['EngineRpm'].isna()]

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins = 100, 
                   histnorm = 'percent'
                   
                  )
fig.update_yaxes(range = [0, 30])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
df = final.loc[final['derate'] == 'No Derate']

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins = 100,
                   histnorm = 'percent',
                   color_discrete_sequence = ['#9900ff'],
                   template="plotly_white"
                  )


fig.update_yaxes(range = [0, 30])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
df = final.loc[final['derate'] == 'Derate']

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins = 100,
                   histnorm = 'percent',
                   color_discrete_sequence = ['#00ff99'],
                   template="plotly_white"
                  )


fig.update_yaxes(range = [0, 30])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
import plotly.graph_objects as go

In [None]:
# Comparing distribution of total faults for trucks that did have a derate as opposed to those that did not

df = final.loc[(final['derate'] == 'Derate')
              &
               (final['EquipmentID'] != 1692)
              ]
df1 = final.loc[final['derate'] == 'No Derate']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=df['EngineRpm'],
    histnorm='percent',
    name='Derate', # name used in legend and hover labels
    marker_color='#00ff99',
    opacity=0.75
))
fig.add_trace(go.Histogram(
    x=df1['EngineRpm'],
    histnorm='percent',
    name='No Derate',
    marker_color='#9900ff',
    opacity=0.75
))

fig.update_layout(
    template = 'plotly_white',
    title_text='Distribution of Engine RPM Readings, Derate Status Comparison',
    xaxis_title_text='Engine RPM', # xaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.05 # gap between bars of the same location coordinates
)

fig.update_traces(xbins=dict(
        start=0,
        end=2750,
        size=50
    ))

fig.update_yaxes(ticksuffix = "%")

fig.update_xaxes(range = [-10, 2750])
 
fig.update_yaxes(range = [0, 20])

fig.show()

In [None]:
#fig.write_html("../images/engine_rpm_comparison.html")

In [None]:
final.loc[(final['timeUntilDerate_dec'] < 5)
               &
               (final['timeUntilDerate_dec'] > 3)].sort_values('EngineRpm', ascending = False)['EquipmentID'].value_counts()

In [None]:
df = final.loc[(final['timeUntilDerate_dec'] <5)
               &
               (final['timeUntilDerate_dec'] > 2)
               &
               (final['EquipmentID'] != 1692)]

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins = 100,
                   histnorm = 'percent',
                   color_discrete_sequence = ['#00ff99'],
                   template="plotly_white"
                  )


fig.update_layout(
    title_text='Distribution of Engine RPM Readings, 2 - 5 Days Prior to Derate',
    xaxis_title_text='Engine RPM',
    yaxis_title_text=''
)

fig.update_traces(xbins=dict(
        start=0,
        end=2750,
        size=50
    ))

fig.update_yaxes(range = [0, 20],
                ticksuffix = "%")
fig.update_xaxes(range = [-10, 2750])

fig.show()

In [None]:
#fig.write_html("../images/engine_rpm_prederate.html")

In [None]:
df = final.loc[(final['timeUntilDerate_dec'] < 2)
               &
               (final['EquipmentID'] != 1692)]

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins = 80,
                   histnorm = 'percent',
                   color_discrete_sequence = ['#00ff99'],
                   template="plotly_white"
                  )


fig.update_layout(
    title_text='Distribution of Engine RPM Readings, 2 Days Prior to Derate',
    xaxis_title_text='Engine RPM',
    yaxis_title_text=''
)
fig.update_traces(xbins=dict(
        start=0,
        end=2750,
        size=50
    ))

fig.update_yaxes(range = [0, 20],
                ticksuffix = "%")
fig.update_xaxes(range = [-10, 2750])

fig.show()

In [None]:
#fig.write_html("../images/engine_rpm_2prederate.html")

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < 1]

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins = 80,
                   histnorm = 'percent'
                  )


fig.update_yaxes(range = [0, 25])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < .5]

fig = px.histogram(df, 
                   x='EngineRpm',
                   histnorm = 'percent'
                  )

fig.update_yaxes(range = [0, 25])
fig.update_xaxes(range = [0, 2200])


fig.show()

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < .25]

fig = px.histogram(df, 
                   x='EngineRpm',
                   nbins= 80,
                   histnorm = 'percent'
                  )


fig.update_yaxes(range = [0, 30])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < .125]

fig = px.histogram(df, 
                   x='EngineRpm',
                   histnorm = 'percent' 
                  )


fig.update_yaxes(range = [0, 26])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < .0625]

fig = px.histogram(df, 
                   x='EngineRpm',
                   histnorm = 'percent'
                  )

fig.update_yaxes(range = [0, 26])
fig.update_xaxes(range = [0, 2200])

fig.show()

In [None]:
df = final.loc[final['timeUntilDerate_dec'] < .028]

fig = px.histogram(df, 
                   x='EngineRpm',
                   histnorm = 'percent'
                  )

fig.update_yaxes(range = [0, 26])
fig.update_xaxes(range = [0, 2200])

fig.show()

### Fault codes that only occur in trucks that experience derates

In [None]:
derates= final.loc[final['derate'] == 'Derate']
no_derates_faults = final.loc[final['derate'] == 'No Derate']['spn']

In [None]:
derates.loc[~derates['spn'].isin(no_derates_faults)]['spn'].value_counts()

In [None]:
final.loc[final['spn'] == 907]