# Import Libraries

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly
from ydata_profiling import  ProfileReport
import PyQt5 as qt
from IPython.display import display, Markdown
#Enable graphing inside jupytor
#pip install PyQt5 #Install it if not installed
get_ipython().run_line_magic('matplotlib', 'inline')
matplotlib.get_backend()

'module://matplotlib_inline.backend_inline'

# Load Data (Cleaned and Extended)

In [13]:
df = pd.read_excel('maintenance_cleaned_extended.xlsx')

# Functions

## Filter Data above and Below the mean

In [14]:
def TopTailMean(data,Count):
    # Calculate mean
    mean_value = data.values.mean()
    # Get only values above the mean
    Top_mean_data = data[data > mean_value]
    # Get only values below the mean
    Tail_mean_data = data[data < mean_value]
    # Sort values above mean in descending order and select top Count
    top_filtered_data = Top_mean_data.sort_values(ascending=False).head(Count)
    # Sort values below mean in descending order and select tail Count
    tail_filtered_data = Tail_mean_data.sort_values(ascending=False).tail(Count)
    return top_filtered_data,tail_filtered_data

## assign colors

In [15]:
def assign_color(index,top ,tail):
    if index in top.index:
        return 'blue'
    elif index in tail.index:
        return 'red'
    else:
        return 'gray'

## Coloring

In [16]:
def coloring(data,top,tail):
    colors = [assign_color(index,top,tail) for index in data.index]
    return colors

## Group by two fields and sort by the sum of the first sum

In [17]:
def sumOfsum(data,groupField1,groupField2,sumField):
    # Step 1: Calculate total sum of CorpLocCostSum for each corporate
    df_field1_sum = data.groupby(groupField1)[sumField].sum().reset_index(name='First Sum')
    # Step 2: Merge the total sum back into the original DataFrame
    df_field2_sum = data.groupby([groupField1, groupField2])[sumField].sum().reset_index(name='Total Sum')
    # Step 3: Merge the total sum of each corporate into this grouped data
    df_field2_sum = df_field2_sum.merge(df_field1_sum, on=groupField1)
    # Step 4: Sort by the total corporate cost (TotalCorpCost) and within that by CorpLocCostSum
    df_field2_sum = df_field2_sum.sort_values(by=['First Sum', 'Total Sum'], ascending=False)
    # Step 5: Drop the 'TotalCorpCost' column if you no longer need it
    df_field2_sum = df_field2_sum.drop(columns=['First Sum'])
    return df_field2_sum

## Visualization Functions

## Bar, Scatter, Line charts

In [18]:
def myPlot(data, plotType, title, x_label, y_label):
    data = data.sort_values(ascending=True)
    xs = data.index.astype(str)  # Convert index to strings for x-axis
    ys = data.values  # y-axis values
    top_filtered_data,tail_filtered_data = TopTailMean(data,3)
    colors = coloring(data,top_filtered_data,tail_filtered_data)
    '''
    # Generate notes/annotations for each bar
    annotations = ['> Avg' if value > mean_value else '< Avg' for value in ys]
    '''
    if plotType == 'bar':
        fig = px.bar(x=xs, y=ys, title=title + ' Analysis')
        fig.update_traces(marker_color=colors)  # Bar-specific color update
        '''
        # Add annotations beside each bar
        for i, value in enumerate(ys):
            fig.add_annotation(
                x=xs[i], y=value, 
                text=annotations[i],  # Annotation text
                showarrow=False, 
                xanchor='center', 
                yanchor='bottom',
                font=dict(color=colors[i], size=12),  # Color annotations to match bar color
                #bgcolor="white",  # Optional background color for better visibility
                #bordercolor=colors[i]  # Match border color with text
            )
        '''    
    elif plotType == 'scatter':
        fig = px.scatter(x=xs, y=ys, title=title + ' Analysis')
        fig.update_traces(marker=dict(color=colors))  # Scatter-specific color update
        '''
        # Add annotations beside each scatter point
        for i, value in enumerate(ys):
            fig.add_annotation(
                x=xs[i], y=value, 
                text=annotations[i],
                showarrow=True, 
                arrowhead=2, 
                ax=20, ay=-20,
                font=dict(color=colors[i], size=12),
                #bgcolor="white",
                #bordercolor=colors[i]
            )
        '''    
    elif plotType == 'pie':
        fig = px.pie(names=xs, values=ys, title=title + ' Analysis')
        fig.update_traces(marker=dict(colors=colors))  # Pie-specific color update

        # Pie charts don't support annotations in the same way; can consider labels in the pie
    elif plotType == 'line':
        fig = px.line(x=xs, y=ys, title=title + ' Analysis')

    # Update layout for custom axis labels
    fig.update_layout(
        title_x=0.0,
        xaxis_title=x_label,  # Custom x-axis label
        yaxis_title=y_label   # Custom y-axis label
    )

    fig.show()


# Exploratory Data Analysis (EDA)

## Uni - Variance Analysis

### Fields Exploration

In [19]:
print('================================= Fields Exploration =================================')
for column in df.columns:
    if df[column].nunique() != len(df):
        myPlot(df[column].value_counts(), 'bar', 'Field: ' + column.capitalize(), column.capitalize(), 'Count')



In [20]:
### Fields Value Counts Analysis

In [21]:
def uni(column):
    df_column = df.get(column).value_counts()
    myPlot(df_column,'bar',column.capitalize()+' Count Values ', column.capitalize(), 'Count Values')

In [30]:
for column in df.columns:
    if   not (column == 'Unnamed: 0' or column == '#'):
         uni(column)