In [55]:
# Import Libraries

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly
from ydata_profiling import  ProfileReport
import PyQt5 as qt
from IPython.display import display, Markdown
#Enable graphing inside jupytor
#pip install PyQt5 #Install it if not installed
get_ipython().run_line_magic('matplotlib', 'inline')
matplotlib.get_backend()

'module://matplotlib_inline.backend_inline'

In [57]:
# Load Data

In [58]:
df = pd.read_excel('maintenance_cleaned_extended.xlsx')

In [59]:
# Visualization Functions

In [60]:
## Bar, Scatter, Line charts

In [61]:
def myPlot(data,plotType,title):
    data = data.sort_values(ascending=True)
    xs = data.index.astype(str)  
    ys = data.values
    if plotType == 'bar':
        fig = px.bar(data_frame=data, x = xs, y = ys,color=ys,title=title+' Analysis')
    elif plotType == 'scatter':
        fig = px.scatter(data_frame=data, x = xs, y = ys,color=ys,title=title+' Analysis')
    elif plotType == 'line':
        fig = px.line(data_frame=data, x = xs, y = ys,title=title+' Analysis')
    fig.update_layout(title_x=0.45)
    fig.show()

In [62]:
def myPlot1(data,xs,ys,clr,plotType,title, sort_by=None, ascending=True):
    if sort_by is not None:
        data_sorted = data.sort_values(by=sort_by, ascending=ascending)
    else:
        data_sorted = data
    xt=str(xs)
    yt=str(ys)
    xs = data_sorted[xs].astype(str)  
    ys = data_sorted[ys]              
    clr = data_sorted[clr].astype(str)
    if plotType == 'bar':
        fig = px.bar(data_frame=data_sorted, x = xs, y = ys,color=clr,title=title+' Analysis')
    elif plotType == 'scatter':
        fig = px.scatter(data_frame=data_sorted, x = xs, y = ys,color=clr,title=title+' Analysis')
    elif plotType == 'line':
        fig = px.line(data_frame=data_sorted, x = xs, y = ys,color=clr,title=title+' Analysis')
    fig.update_layout(title_x=0.5)
    fig.update_layout(
        xaxis_title=xt,
        yaxis_title=yt
)
    fig.show()

In [63]:
def myPlot2(data, plotType, title):
    xs = data.index.astype(str)  # Index (x-axis)
    ys = data.values             # Values (y-axis)
    # Plot based on the plotType
    if plotType == 'bar':
        fig = px.bar(x=xs, y=ys, color=ys, title=title + ' Analysis')
    elif plotType == 'scatter':
        fig = px.scatter(x=xs, y=ys, color=ys, title=title + ' Analysis')
    elif plotType == 'line':
        fig = px.line(x=xs, y=ys, title=title + ' Analysis')
    # Center the title
    fig.update_layout(title_x=0.5)
    fig.show()

In [64]:
def myBoxPlot(data,x,y,color,title):
    fig = px.box(data, x=x, y=y, color=color, title=title)
    fig.update_layout(
        title_x=0.5,
        xaxis_title=str(x),
        yaxis_title=str(y)
    )
    fig.show()

In [65]:
## Sunburst chart

In [66]:
def mySunBurst(data, name, value, title):
    fig = px.sunburst(
        data_frame=data,
        #path=['cost_category', 'damage type'],   # Add both cost_category and damage type to the hierarchy
        path=name,
        values=value,  # Define the values (damage_count)
        title=title+' Analysis'
    )
    fig.update_layout(title_x=0.45)
    fig.show()

In [67]:
## Pie chart

In [68]:
def myPie(data,title_prefix):
    name  = data.index
    value = data.values
    fig = px.pie(data_frame=data,
                 names = name, 
                 values = value,
                 title ='Top 5 '+ title_prefix +' Analysis'
                )
    fig.update_layout(title_x=0.5)

    fig.show()

In [69]:
## Combine DataFrames

In [70]:
def combine(data,first_field,first_field_count,field_grouped_on,resulting_field_value):
    data_first_cat = data[first_field].value_counts().reset_index()
    data_first_cat.columns = [first_field,first_field_count]
    data_merged = data.groupby([first_field])[field_grouped_on].sum().reset_index(name=resulting_field_value)
    data_merged = data_merged.merge(data_first_cat,on=first_field)
    return first_field_count, resulting_field_value, data_merged

In [71]:
## Bi - Variance Analysis

In [72]:
### Car Models Insights

In [73]:
#### Most frequently serviced car models

In [74]:
most_frequent_Serviced_car = df.groupby('car').size().sort_values(ascending=False).reset_index(name='Frequecy of servecing')
myPlot1(most_frequent_Serviced_car,xs='car',ys='Frequecy of servecing',clr='car',plotType='bar',title='Most frequently serviced car models', sort_by=None, ascending=True)

In [75]:
#### Average repair cost per car model

In [76]:
average_cost_car = df.groupby('car')['cost'].mean().sort_values(ascending=False).reset_index(name='Average Repait Cost')
myPlot1(average_cost_car,xs='car',ys='Average Repait Cost',clr='car',plotType='bar',title='Average repair cost per car model', sort_by=None, ascending=True)

In [77]:
#### Total costs per car model across all damage types

In [78]:
total_cost_damage_car = df.groupby(['car','damage type'])['cost'].sum().sort_values(ascending=False).reset_index(name='Total Costs')
myPlot1(total_cost_damage_car,xs='car',ys='Total Costs',clr='damage type',plotType='bar',title='Total costs per car model across all damage types', sort_by=None, ascending=True)
# Plot SunBurst chart
mySunBurst(data=damage_total_cost_car, name=['car','damage type'], value='Total Cost', title='Total costs per car model across all damage types')

In [79]:
#### Most common damage types for each car model

In [80]:
damage_Type_car = df.groupby(['damage type','car'])['damage type'].count().reset_index(name='Frequency')
# Plot stacked bar chart
fig = px.bar(damage_Type_car, 
             x='car', 
             y='Frequency', 
             color='damage type',
             barmode='stack',
             title="Most common damage types for each car model",
             labels={'car': 'Car Model', 'Frequency': 'Damage Frequency', 'damage type': 'Damage Type'}
            )
fig.update_layout(title_x=0.5)
fig.show()
#st.plotly_chart(fig,theme=None, use_container_width=True)
# Plot grouped bar chart
fig = px.bar(damage_Type_car,
            x='car', 
            y='Frequency', 
            color='damage type',
            barmode='group',
            title="Most common damage types for each car model",
            labels={'car': 'Car Model', 'Frequency': 'Damage Frequency', 'damage type': 'Damage Type'}
            )
fig.update_layout(title_x=0.5)
fig.show()
#st.plotly_chart(fig,theme=None, use_container_width=True)

In [81]:
#### Service duration per car model

In [83]:
myBoxPlot(df,x='car',y='service_duration',color='damage type',title='Service duration per car model')

In [84]:
#### Kilometers driven (KMs diff) by car model

In [86]:
KMsDiff_car = df.groupby(['car'])['KMs Diff'].size().sort_values(ascending=False).reset_index(name='KMs Difference')
myPlot1(KMsDiff_car,xs='car',ys='KMs Difference',clr='car',plotType='bar',title='Kilometers driven (KMs diff) by car model', sort_by=None, ascending=True)

In [None]:
#### Car model distribution by service location

In [108]:
# Group by location and car model, and count the occurrences
location_car = df.groupby(['location', 'car']).size().reset_index(name='Count')

# Plot stacked bar chart
fig = px.bar(location_car, 
             x='location', 
             y='Count', 
             color='car',
             barmode='stack',
             title='Car Model Distribution by Service Location',
             labels={'count': 'Number of Cars', 'location': 'Service Location'}
            )

fig.update_layout(title_x=0.5)
fig.show()