# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly
from ydata_profiling import  ProfileReport
import PyQt5 as qt
from IPython.display import display, Markdown
#Enable graphing inside jupytor
#pip install PyQt5 #Install it if not installed
get_ipython().run_line_magic('matplotlib', 'inline')
matplotlib.get_backend()
from wordcloud import WordCloud
import arabic_reshaper
from bidi.algorithm import get_display

# Load Data

In [4]:
df = pd.read_excel('maintenance_cleaned_extended.xlsx')

# Visualization Functions

## Bar, Scatter, Line charts

In [7]:
def myPlot(data, plotType, title, x_label, y_label):
    data = data.sort_values(by=data.values,ascending=True)
    xs = data.index.astype(str)  # Convert index to strings for x-axis
    ys = data.values  # y-axis values
    # Calculate mean
    mean_value = ys.mean()

    # Get only values above the mean
    above_mean_data = data[data > mean_value]
    # Get only values below the mean
    below_mean_data = data[data < mean_value]

    # Sort values above mean in descending order and select top 3
    top3_above_mean = above_mean_data.sort_values(ascending=False).head(3)
    # Sort values below mean in descending order and select bottom 3
    bot3_below_mean = below_mean_data.sort_values(ascending=False).tail(3)

    # Create color mapping: blue for top 3 above mean, red for bottom 3 below mean, gray for others
    colors = ['blue' if index in top3_above_mean.index 
              else 'red' if index in bot3_below_mean.index 
              else 'gray' 
              for index in data.index]

    # Generate notes/annotations for each bar
    annotations = ['> Avg' if value > mean_value else '< Avg' for value in ys]

    if plotType == 'bar':
        fig = px.bar(x=xs, y=ys, title=title + ' Analysis')
        fig.update_traces(marker_color=colors)  # Bar-specific color update
        '''
        # Add annotations beside each bar
        for i, value in enumerate(ys):
            fig.add_annotation(
                x=xs[i], y=value, 
                text=annotations[i],  # Annotation text
                showarrow=False, 
                xanchor='center', 
                yanchor='bottom',
                font=dict(color=colors[i], size=12),  # Color annotations to match bar color
                #bgcolor="white",  # Optional background color for better visibility
                #bordercolor=colors[i]  # Match border color with text
            )
        '''    
    elif plotType == 'scatter':
        fig = px.scatter(x=xs, y=ys, title=title + ' Analysis')
        fig.update_traces(marker=dict(color=colors))  # Scatter-specific color update
        '''
        # Add annotations beside each scatter point
        for i, value in enumerate(ys):
            fig.add_annotation(
                x=xs[i], y=value, 
                text=annotations[i],
                showarrow=True, 
                arrowhead=2, 
                ax=20, ay=-20,
                font=dict(color=colors[i], size=12),
                #bgcolor="white",
                #bordercolor=colors[i]
            )
        '''    
    elif plotType == 'pie':
        fig = px.pie(names=xs, values=ys, title=title + ' Analysis')
        fig.update_traces(marker=dict(colors=colors))  # Pie-specific color update

        # Pie charts don't support annotations in the same way; can consider labels in the pie
    elif plotType == 'line':
        fig = px.line(x=xs, y=ys, title=title + ' Analysis')

    # Update layout for custom axis labels
    fig.update_layout(
        title_x=0.0,
        xaxis_title=x_label,  # Custom x-axis label
        yaxis_title=y_label   # Custom y-axis label
    )

    fig.show()

In [8]:
def myPlot1(data, xs, ys, clr, plotType, title, sort_by=None, ascending=True):
    if sort_by is not None:
        data_sorted = data.sort_values(by=sort_by, ascending=ascending)
    else:
        data_sorted = data
    xt = str(xs)
    yt = str(ys)
    xs = data_sorted[xs]
    ys = data_sorted[ys]
    clr = data_sorted[clr].astype(str) if clr else None
    if plotType == 'bar':
        fig = px.bar(x=xs, y=ys, color=clr, title=title + ' Analysis')
    elif plotType == 'scatter':
        fig = px.scatter(x=xs, y=ys, color=clr, title=title + ' Analysis')
    elif plotType == 'line':
        fig = px.line(x=xs, y=ys, color=clr, title=title + ' Analysis')
    fig.update_layout(title_x=0.0)
    fig.update_layout(xaxis_title=xt, yaxis_title=yt)
    fig.show()

In [9]:
def myPlot2(data, plotType, title):
    xs = data.index.astype(str)  # Index (x-axis)
    ys = data.values             # Values (y-axis)
    # Plot based on the plotType
    if plotType == 'bar':
        fig = px.bar(x=xs, y=ys, color=ys, title=title + ' Analysis')
    elif plotType == 'scatter':
        fig = px.scatter(x=xs, y=ys, color=ys, title=title + ' Analysis')
    elif plotType == 'line':
        fig = px.line(x=xs, y=ys, title=title + ' Analysis')
    # Center the title
    fig.update_layout(title_x=0.5)
    fig.show()

In [10]:
def myBoxPlot(data,x,y,color,title):
    fig = px.box(data, x=x, y=y, color=color, title=title)
    fig.update_layout(
        title_x=0.5,
        xaxis_title=str(x),
        yaxis_title=str(y)
    )
    fig.show()

## Sunburst chart

In [12]:
def mySunBurst(data, name1,name2, value, title):
    fig = px.sunburst(
        data_frame=data,
        path=[name1,name2],   # Add both cost_category and damage type to the hierarchy
        #path=name,
        values=value,  # Define the values (damage_count)
        title=title +' Analysis'
    )
    fig.update_layout(title_x=0.0)
    fig.show()

## Pie chart

In [14]:
def myPie(data,title_prefix):
    name  = data.index
    value = data.values
    fig = px.pie(data_frame=data,
                 names = name, 
                 values = value,
                 title ='Top 5 '+ title_prefix +' Analysis'
                )
    fig.update_layout(title_x=0.5)

    fig.show()

## Combine DataFrames

In [16]:
def combine(data,first_field,first_field_count,field_grouped_on,resulting_field_value):
    data_first_cat = data[first_field].value_counts().reset_index()
    data_first_cat.columns = [first_field,first_field_count]
    data_merged = data.groupby([first_field])[field_grouped_on].sum().reset_index(name=resulting_field_value)
    data_merged = data_merged.merge(data_first_cat,on=first_field)
    return first_field_count, resulting_field_value, data_merged

## Bi - Variance Analysis

### Service Duration Efficiency

#### Service duration 

In [139]:
Service_Duration = df.groupby(['service_duration'])['service_duration'].sum().reset_index(name='Total_service_duration')
Service_Duration = Service_Duration.sort_values('Total_service_duration',ascending=True)
# Create the updated bar chart

fig = px.bar(Service_Duration, 
             x='service_duration', 
             y='Total_service_duration',
             #color='Total_service_duration',
             title="Service duration for each service type",
             labels={'service_duration': 'Service Duration', 'Total_service_duration': 'Total Service Duration'})

fig.update_layout(xaxis_title="Service Duration", yaxis_title="Total Service Duration", 
                  xaxis_tickangle=-45, height=500)
fig.show()


#### Service duration per damage type

In [141]:
Service_Duration = df.groupby(['service_duration','damage type'])['service_duration'].sum().reset_index(name='Total_service_duration')
Service_Duration = Service_Duration.sort_values('Total_service_duration',ascending=True)
# Create the updated bar chart

fig = px.bar(Service_Duration, 
             x='service_duration', 
             y='Total_service_duration',
             color='damage type',
             title="Service duration for each service type",
             labels={'service_duration': 'Service Duration', 'Total_service_duration': 'Total Service Duration'})

fig.update_layout(xaxis_title="Service Duration", yaxis_title="Total Service Duration", 
                  xaxis_tickangle=-45, height=500)
fig.show()

#### Service duration per damage type per location

In [149]:
Service_Duration = df.groupby(['service_duration','damage type','location'])['service_duration'].sum().reset_index(name='service_duration_count')
Service_Duration = Service_Duration.sort_values('service_duration_count',ascending=True)
# Create the updated bar chart

fig = px.scatter(Service_Duration, 
             x='location', 
             y='service_duration_count',
             color='damage type',
             size='service_duration',
             title="Service duration for each service type for each location",
             labels={'service_duration': 'Service Duration Days', 'service_duration_count': 'Service Duration Count','damage type': 'Damage Type'})

fig.update_layout(xaxis_title="Service Location Days", yaxis_title="Service Duration Count", 
                  xaxis_tickangle=-45, height=500)
fig.show()

In [160]:
print("\
Service Duration Efficiency\n\
Recommendations for Investigation:\n\
==================================\n\
1.Investigate Parts and Material Availability:\n\
  * Look into whether the locations with long service durations are facing delays in receiving necessary spare parts or \n\
    materials.\n\
  * Recommendation: Track the time taken to order and receive parts at each location. This could help identify if certain\n\
    service delays are due to supply chain issues rather than inefficiency in the repair process.\n\
\n\
2.Damage Complexity:\n\
  * Compare the complexity of damage types with the service duration. Some repairs may take longer naturally, so identifying\n\
    these complex services could help avoid false conclusions about inefficiency.\n\
  * Recommendation: Categorize damage types by complexity and analyze whether more complex services are taking\n\
    disproportionately longer than expected.\n\
\n\
3.Inventory Management:\n\
  * Look into inventory levels at each service location. Locations with low stock of commonly needed parts may face delays\n\
    due to restocking times.\n\
  * Recommendation: Implement an optimized inventory management system at service locations to ensure that critical parts\n\
    are always in stock and available when needed.\n\
\n\
4.Client-Specific Preferences:\n\
  * Corporate clients might require specific handling procedures or custom parts, which could add to the repair duration.\n\
  * Recommendation: Review the service agreements with major clients to determine if any special requests or conditions\n\
    could be contributing to delays. Consider whether those requests are necessary or if there are ways to expedite services\n\
    while maintaining client satisfaction.\n\
\n\
5.Worker and Resource Allocation:\n\
  * If certain locations face consistent delays, consider whether they are understaffed or if mechanic expertise does not\n\
    match the complexity of repairs. It’s not just about inefficiency but also about matching the right resources to the job.\n\
  * Recommendation: Evaluate whether worker allocation, skill levels, or tool availability in specific locations could be\n\
    contributing to delays and adjust resource deployment accordingly.\n\
\n\
Final Thoughts:\n\
  The longer service durations observed in the analysis could be a result of multiple factors, not just inefficiency at the\n\
  worker level. By broadening the scope of investigation to include parts availability, damage complexity, client demands, \n\
  and inventory management, Ahmad could gain deeper insights into why certain locations or damage types experience delays.\
")

Service Duration Efficiency
Recommendations for Investigation:
1.Investigate Parts and Material Availability:
  * Look into whether the locations with long service durations are facing delays in receiving necessary spare parts or 
    materials.
  * Recommendation: Track the time taken to order and receive parts at each location. This could help identify if certain
    service delays are due to supply chain issues rather than inefficiency in the repair process.

2.Damage Complexity:
  * Compare the complexity of damage types with the service duration. Some repairs may take longer naturally, so identifying
    these complex services could help avoid false conclusions about inefficiency.
  * Recommendation: Categorize damage types by complexity and analyze whether more complex services are taking
    disproportionately longer than expected.

3.Inventory Management:
  * Look into inventory levels at each service location. Locations with low stock of commonly needed parts may face delays
 