In [None]:
import pandas as pd
import numpy as np
import pickle

import plotly.express as px

In [None]:
final_faults = pd.read_pickle('../data/final_faults.pkl')
on_faults = pd.read_pickle('../data/on_faults.pkl')

In [None]:
# Drop rows where fault light is being turned off
final_faults = final_faults.loc[final_faults['active'] == True].reset_index(drop = True)

on_faults = on_faults.loc[on_faults['active'] == True].reset_index(drop = True)

In [None]:
# Filter down to only include full derates
full_derates = final_faults.loc[final_faults['spn'] == 5246].reset_index(drop = True)

In [None]:
final_faults.loc[final_faults['EquipmentID'].isin(full_derates['EquipmentID']), 'derate'] = 'Trucks with Derate'
on_faults.loc[on_faults['EquipmentID'].isin(full_derates['EquipmentID']), 'derate'] = 'Trucks with Derate'

In [None]:
final_faults.loc[~final_faults['EquipmentID'].isin(full_derates['EquipmentID']), 'derate'] = 'Trucks with No Derate'
on_faults.loc[~on_faults['EquipmentID'].isin(full_derates['EquipmentID']), 'derate'] = 'Trucks with No Derate'

In [None]:
final_faults.info()

In [None]:
full_derates.loc[~full_derates.duplicated(['EquipmentID', 'EventDate'], keep = 'first'), 'first?'] = 'First'
full_derates.loc[full_derates.duplicated(['EquipmentID', 'EventDate'], keep = 'first'), 'first?'] = 'Subsequent'

In [None]:
df1 = (
    on_faults
    .groupby('MonthYear')
    .agg(total = ('EquipmentID', 'nunique'))
    .reset_index()
)

df1['Category'] = 'Trucks'

df2 = (
    on_faults
    .groupby('MonthYear')
    .agg(total = ('EquipmentID', 'count'))
    .reset_index()
)

df2['Category'] = 'Faults'

totals = pd.concat([df1, df2])

totals

In [None]:
df = totals

fig = px.line(df, 
              x='MonthYear', 
              y = 'total',
              color = 'Category',
              labels={
                  'MonthYear' : 'Time Period',
                  'total' : 'Total Number'
              },
              markers = True,
              color_discrete_map={
                  'Trucks' : '#ff9900',
                  'Faults' : '#00e6ff'
              },
              template="plotly_white"
             )

fig.update_layout(title_text="Total Faults and Total Trucks with Faults by Month")

fig.show()

In [None]:
#fig.write_html("../data/total_faults.html")

In [None]:
td_df1 = (
    on_faults
    .loc[on_faults['spn'] == 5246]
    .groupby('MonthYear')
    .agg(total = ('EquipmentID', 'nunique'))
    .reset_index()
)

td_df1['Category'] = 'Trucks'

td_df2 = (
    on_faults
    .loc[on_faults['spn'] == 5246]
    .groupby('MonthYear')
    .agg(total = ('EquipmentID', 'count'))
    .reset_index()
)

td_df2['Category'] = 'Derates'

td_totals = pd.concat([td_df1, td_df2])

td_totals

In [None]:
df = td_totals

fig = px.line(df, 
             x='MonthYear', 
              y = 'total',
              color = 'Category',
              labels={
                  'MonthYear' : 'Time Period',
                  'total' : 'Total Number'
              },
              markers = True,
              color_discrete_map={
                  'Trucks' : '#ff9900',
                  'Derates' : '#00ff99'
              },
              template="plotly_white"
             )

fig.update_layout(title_text="Total Derates and Total Trucks with Derates by Month")

fig.show()

In [None]:
#fig.write_html("../data/total_derates.html")

In [None]:
comparisons = (
    on_faults
    .groupby('derate').agg(
        total_faults = ('RecordID', 'count'),
        total_trucks = ('EquipmentID', 'nunique')
                          )
    .reset_index()
)

comparisons['faults_per_truck'] = comparisons['total_faults'] / comparisons['total_trucks']

comparisons

In [None]:
df = comparisons

fig = px.bar(df, 
             x='derate', 
             y='faults_per_truck',
             color ='derate',
            labels={
                'faults_per_truck' : 'Faults per Truck',
                'derate' : 'Derate Status'
              },
              color_discrete_map={
                  'Trucks with No Derate' : '#9900ff',
                  'Trucks with Derate' : '#00ff99'
              },
              template="plotly_white")


fig.update_layout(title_text="Overall Faults per Truck by Derate Status",
                  xaxis={'visible': False, 'showticklabels': False}
                 )




fig.show()

In [None]:
#fig.write_image("../data/faults_per_truck.jpeg")

In [None]:
firsts = (
    full_derates
    .groupby('first?').agg(
        totals = ('RecordID', 'count')
                          )
    .reset_index()
)

firsts

In [None]:
df = firsts

fig = px.bar(df, 
             x='first?', 
             y='totals',
             color ='first?',
            labels={
                'totals' : 'Total Derates',
                'first?' : 'Derate Sequence'
              },
              color_discrete_map={
                  'First' : '#00ff59',
                  'Subsequent' : '#00ffd9'
              },
              template="plotly_white")


fig.update_layout(title_text="Nearly 1/4 of All Derate Faults Occur In Sequence (Same Truck, Same Day)",
                  xaxis={'visible': False, 'showticklabels': False}
                 )




fig.show()

In [None]:
#fig.write_image("../data/derate_sequence.jpeg")

In [None]:
# Is the difference in derates per truck something that is related to the individual truck, or when derates happen
# do a lot of other faults also happen at the same time?