<a href="https://colab.research.google.com/github/tasosnikitakis/Data_Science_Notebooks/blob/main/pharmacy_bonuses_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dash

Collecting dash
  Downloading dash-2.13.0-py3-none-any.whl (10.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
Collecting Werkzeug<2.3.0 (from dash)
  Downloading Werkzeug-2.2.3-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting ansi2html (from dash)
  Downloading ansi2html-1.8.0-py3-none-any.whl (16 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, W

In [2]:
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
from dash.exceptions import PreventUpdate
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import plotly.express as ptx
import plotly.graph_objects as go

#Dataframe Creation

In [3]:
bonuses_df = pd.read_excel("/content/drive/MyDrive/pharmacy_data/Bonuses.xlsx")

In [5]:
bonuses_df.head()

Unnamed: 0,DATE,BONUS CATEGORY,BONUS
0,20/01/2021,ΤΑΜΕΙΑΚΗ ΕΚΠΤΩΣΗ,107.32
1,20/06/2021,BONUS ΣΤΟΧΩΝ,193.15
2,20/11/2021,BONUS ΣΤΟΧΩΝ,99.64
3,20/12/2021,BONUS ΣΤΟΧΩΝ,275.6
4,20/03/2022,BONUS ΣΤΟΧΩΝ,169.6


##Data Exploration

In [7]:
bonuses_df.columns

Index(['DATE', 'BONUS CATEGORY', 'BONUS'], dtype='object')

#Data Cleaning

In [9]:
bonuses_df.DATE = pd.to_datetime(bonuses_df.DATE, format="%d/%m/%Y")
bonuses_df.style.format({"DATE": lambda t: t.strftime("%d/%m/%Y")})

Unnamed: 0,DATE,BONUS CATEGORY,BONUS
0,20/01/2021,ΤΑΜΕΙΑΚΗ ΕΚΠΤΩΣΗ,107.32
1,20/06/2021,BONUS ΣΤΟΧΩΝ,193.15
2,20/11/2021,BONUS ΣΤΟΧΩΝ,99.64
3,20/12/2021,BONUS ΣΤΟΧΩΝ,275.6
4,20/03/2022,BONUS ΣΤΟΧΩΝ,169.6
5,20/04/2022,ΤΑΜΕΙΑΚΗ ΕΚΠΤΩΣΗ,237.55
6,20/07/2022,BONUS ΣΤΟΧΩΝ,153.21
7,20/09/2022,ΤΑΜΕΙΑΚΗ ΕΚΠΤΩΣΗ,83.96
8,20/10/2022,BONUS ΣΤΟΧΩΝ,102.4
9,20/12/2022,BONUS ΣΤΟΧΩΝ,288.28


In [10]:
def aggregate_by_year_sums(bonuses_df):
  # Assuming you have your dataframe named 'df'
  # First, convert the 'DATE' column to a datetime format if it's not already
  bonuses_df['DATE'] = pd.to_datetime(bonuses_df['DATE'], format='%d/%m/%Y')

  # Extract the year and month from the 'DATE' column
  bonuses_df['YEAR'] = bonuses_df['DATE'].dt.year

  # Group by 'YEAR' and sum the 'EXPENSE' column
  aggregated_df = bonuses_df.groupby('YEAR')['BONUS'].sum().reset_index()

  # Display the aggregated dataframe
  return aggregated_df

In [11]:
yearly_total_bonuses_df = aggregate_by_year_sums(bonuses_df)

In [12]:
yearly_total_bonuses_df

Unnamed: 0,YEAR,BONUS
0,2021,675.71
1,2022,1035.0
2,2023,149.46


In [17]:
def aggregate_by_year_category_sums(expenses_df):
  # Assuming you have your dataframe named 'df'
  # First, convert the 'DATE' column to a datetime format if it's not already
  expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

  # Extract the year and month from the 'DATE' column
  expenses_df['YEAR'] = expenses_df['DATE'].dt.year

  # Group by 'YEAR' and 'EXPENSE SUBCATEGORY' summing the 'EXPENSE' column
  aggregated_df = expenses_df.groupby(['YEAR', 'BONUS CATEGORY'])['BONUS'].sum().reset_index()

  # Display the aggregated dataframe
  return aggregated_df

In [18]:
aggregated_categories_by_year = aggregate_by_year_category_sums(bonuses_df)

In [19]:
aggregated_categories_by_year

Unnamed: 0,YEAR,BONUS CATEGORY,BONUS
0,2021,BONUS ΣΤΟΧΩΝ,568.39
1,2021,ΤΑΜΕΙΑΚΗ ΕΚΠΤΩΣΗ,107.32
2,2022,BONUS ΣΤΟΧΩΝ,713.49
3,2022,ΤΑΜΕΙΑΚΗ ΕΚΠΤΩΣΗ,321.51
4,2023,BONUS ΣΤΟΧΩΝ,149.46


In [22]:
def calculate_bonuses_for_year(expenses_df, target_year):
    # Convert the DATE column to datetime if it's not already in that format
    expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

    # Filter the DataFrame for expenses in the specified year and month
    filtered_df = expenses_df[
        (expenses_df['DATE'].dt.year == target_year)
    ]

    # Calculate the total expenses for the specified month and year
    total_expenses = filtered_df['BONUS'].sum()

    return total_expenses

In [23]:
bonuses_2021 = calculate_bonuses_for_year(bonuses_df, 2021)

In [24]:
bonuses_2021

675.71

In [25]:
df = bonuses_df.query("YEAR == 2021")
fig = ptx.pie(df, values='BONUS', names='BONUS CATEGORY', title='2020 Bonuses Categries Pie Chart')
fig.show()