<a href="https://colab.research.google.com/github/tasosnikitakis/Data_Science_Notebooks/blob/main/pharmacy_expenses_data_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [1]:
!pip install dash

Collecting dash
  Downloading dash-2.13.0-py3-none-any.whl (10.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
Collecting Werkzeug<2.3.0 (from dash)
  Downloading Werkzeug-2.2.3-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting ansi2html (from dash)
  Downloading ansi2html-1.8.0-py3-none-any.whl (16 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, W

In [2]:
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
from dash.exceptions import PreventUpdate
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import plotly.express as ptx
import plotly.graph_objects as go

#Dataframe Creation

In [4]:
expenses_df = pd.read_excel("/content/drive/MyDrive/pharmacy_data/Expenses_2022.xlsx")

In [None]:
expenses_df.head()

Unnamed: 0,DATE,EXPENSE CATEGORY,EXPENSE SUBCATEGORY,EXPENSE
0,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΕΝΟΙΚΙΟ,400.0
1,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΤΗΛΕΦΩΝΟ,51.5
2,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,23.67
3,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,89.76
4,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,523.25


#Data Exploration

In [5]:
expenses_df.describe()

Unnamed: 0,EXPENSE
count,515.0
mean,182.646874
std,182.789277
min,0.0
25%,54.915
50%,100.0
75%,275.575
max,1324.0


In [6]:
expenses_df.columns

Index(['DATE', 'EXPENSE CATEGORY', 'EXPENSE SUBCATEGORY', 'EXPENSE'], dtype='object')

#Data Cleaning

In [7]:
expenses_df.DATE = pd.to_datetime(expenses_df.DATE, format="%d/%m/%Y")
expenses_df.style.format({"DATE": lambda t: t.strftime("%d/%m/%Y")})

Unnamed: 0,DATE,EXPENSE CATEGORY,EXPENSE SUBCATEGORY,EXPENSE
0,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΕΝΟΙΚΙΟ,400.0
1,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΤΗΛΕΦΩΝΟ,51.5
2,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,23.67
3,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,89.76
4,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,523.25
5,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΠΡΟΜΗΘΕΙΑ POS,25.93
6,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΛΟΓΙΣΤΗΣ,100.0
7,20/01/2020,ΕΚΤΑΚΤΑ ΕΞΟΔΑ,ΣΑΚΟΥΛΕΣ,33.53
8,20/01/2020,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,ΤΑΣΟΣ,263.62
9,20/01/2020,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,ΒΙΚΥ (ΙΚΑ+ΕΠΙΚΟΥΡΙΚΑ),301.01


##Monthly expenses categories sums data dataframe creation from main dataframe that contains daily data


In [8]:
def aggregate_by_year_month_category_sums(expenses_df):
  # Assuming you have your dataframe named 'df'
  # First, convert the 'DATE' column to a datetime format if it's not already
  expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

  # Extract the year and month from the 'DATE' column
  expenses_df['YEAR'] = expenses_df['DATE'].dt.year
  expenses_df['MONTH'] = expenses_df['DATE'].dt.month

  # Group by 'YEAR', 'MONTH', and 'EXPENSE CATEGORY' and sum the 'EXPENSE' column
  aggregated_df = expenses_df.groupby(['YEAR', 'MONTH', 'EXPENSE CATEGORY'])['EXPENSE'].sum().reset_index()

  # Display the aggregated dataframe
  return aggregated_df

In [9]:
monthly_category_expenses = aggregate_by_year_month_category_sums(expenses_df)

In [10]:
monthly_category_expenses

Unnamed: 0,YEAR,MONTH,EXPENSE CATEGORY,EXPENSE
0,2020,1,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,564.63
1,2020,1,ΕΚΤΑΚΤΑ ΕΞΟΔΑ,33.53
2,2020,1,ΕΠΕΝΔΥΣΕΙΣ,99.00
3,2020,1,ΠΑΓΙΑ ΕΞΟΔΑ,1214.11
4,2020,2,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,754.63
...,...,...,...,...
136,2023,6,ΠΑΓΙΑ ΕΞΟΔΑ,1041.58
137,2023,7,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,584.01
138,2023,7,ΠΑΓΙΑ ΕΞΟΔΑ,1124.81
139,2023,8,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,584.01


##Aggregation function by year and month creating dataframe with all monthly expenses sums

In [11]:
def aggregate_by_year_month_sums(expenses_df):
  # Assuming you have your dataframe named 'df'
  # First, convert the 'DATE' column to a datetime format if it's not already
  expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

  # Extract the year and month from the 'DATE' column
  expenses_df['YEAR'] = expenses_df['DATE'].dt.year
  expenses_df['MONTH'] = expenses_df['DATE'].dt.month

  # Group by 'YEAR', 'MONTH' and sum the 'EXPENSE' column
  aggregated_df = expenses_df.groupby(['YEAR', 'MONTH'])['EXPENSE'].sum().reset_index()

  # Display the aggregated dataframe
  return aggregated_df

In [12]:
monthly_total_expenses_df = aggregate_by_year_month_sums(expenses_df)

In [13]:
monthly_total_expenses_df

Unnamed: 0,YEAR,MONTH,EXPENSE
0,2020,1,1911.27
1,2020,2,2637.73
2,2020,3,1456.82
3,2020,4,1897.78
4,2020,5,1624.0
5,2020,6,1303.49
6,2020,7,1773.51
7,2020,8,1232.1
8,2020,9,2616.87
9,2020,10,2086.44


##Aggregation function of total yearly expenses

In [14]:
def aggregate_by_year_sums(expenses_df):
  # Assuming you have your dataframe named 'df'
  # First, convert the 'DATE' column to a datetime format if it's not already
  expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

  # Extract the year and month from the 'DATE' column
  expenses_df['YEAR'] = expenses_df['DATE'].dt.year

  # Group by 'YEAR' and sum the 'EXPENSE' column
  aggregated_df = expenses_df.groupby('YEAR')['EXPENSE'].sum().reset_index()

  # Display the aggregated dataframe
  return aggregated_df

In [15]:
yearly_total_expenses_df = aggregate_by_year_sums(expenses_df)

In [16]:
yearly_total_expenses_df

Unnamed: 0,YEAR,EXPENSE
0,2020,24398.75
1,2021,30333.16
2,2022,23974.98
3,2023,15356.25


##Aggregation function to aggreagate by year and expenses subcategory and the sum of expenses

In [17]:
def aggregate_by_year_subcategory_sums(expenses_df):
  # Assuming you have your dataframe named 'df'
  # First, convert the 'DATE' column to a datetime format if it's not already
  expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

  # Extract the year and month from the 'DATE' column
  expenses_df['YEAR'] = expenses_df['DATE'].dt.year

  # Group by 'YEAR' and 'EXPENSE SUBCATEGORY' summing the 'EXPENSE' column
  aggregated_df = expenses_df.groupby(['YEAR', 'EXPENSE SUBCATEGORY'])['EXPENSE'].sum().reset_index()

  # Display the aggregated dataframe
  return aggregated_df

## Function that calculates the total expenses for the current month based on expenses_df DataFrame

In [None]:
def current_month_total_expenses(expenses_df):
    # Convert the DATE column to datetime if it's not already in that format
    expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

    # Get the current month and year
    current_month = datetime.now().month
    current_year = datetime.now().year

    # Filter the DataFrame for expenses in the current month and year
    filtered_df = expenses_df[
        (expenses_df['DATE'].dt.month == current_month) &
        (expenses_df['DATE'].dt.year == current_year)
    ]

    # Calculate the total expenses for the current month
    total_expenses = filtered_df['EXPENSE'].sum()

    return total_expenses


##Function that calculates total expenses for specific year and month that are passed as arguments from expenses_df DataFrame

In [36]:
def calculate_expenses_for_month_and_year(expenses_df, target_year, target_month):
    # Convert the DATE column to datetime if it's not already in that format
    expenses_df['DATE'] = pd.to_datetime(expenses_df['DATE'], format='%d/%m/%Y')

    # Filter the DataFrame for expenses in the specified year and month
    filtered_df = expenses_df[
        (expenses_df['DATE'].dt.year == target_year) &
        (expenses_df['DATE'].dt.month == target_month)
    ]

    # Calculate the total expenses for the specified month and year
    total_expenses = filtered_df['EXPENSE'].sum()

    return total_expenses

In [37]:
year_to_calculate = 2020
month_to_calculate = 1
total_expenses = calculate_expenses_for_month_and_year(expenses_df, year_to_calculate, month_to_calculate)
print(f'Total expenses for {month_to_calculate}/{year_to_calculate}: {total_expenses:.2f}')

Total expenses for 1/2020: 1911.27


In [18]:
aggregate_by_year_subcategory_sums(expenses_df)

Unnamed: 0,YEAR,EXPENSE SUBCATEGORY,EXPENSE
0,2020,ANIMAPS,148.80
1,2020,BUYCHOICE,101.85
2,2020,CLICK.GR,39.70
3,2020,COPYSHOP,15.88
4,2020,DEALSTORE,8.86
...,...,...,...
124,2023,ΡΕΥΜΑ,763.49
125,2023,ΣΑΚΟΥΛΕΣ,116.07
126,2023,ΤΑΣΟΣ,2142.71
127,2023,ΤΗΛΕΦΩΝΟ,807.03


In [21]:
df = expenses_df.query("YEAR == 2020")
fig = ptx.pie(df, values='EXPENSE', names='EXPENSE CATEGORY', title='2020 Expenses Categries Pie Chart')
fig.show()

In [30]:
filtered_df = expenses_df[(expenses_df['DATE'].dt.year == 2020) & (expenses_df['EXPENSE CATEGORY'] == 'ΠΑΓΙΑ ΕΞΟΔΑ')]
fig = ptx.pie(filtered_df, values='EXPENSE', names='EXPENSE SUBCATEGORY', title='2020 Operating Expenses Pie Chart')
fig.show()

In [31]:
filtered_df = expenses_df[(expenses_df['DATE'].dt.year == 2020) & (expenses_df['EXPENSE CATEGORY'] == 'ΕΚΤΑΚΤΑ ΕΞΟΔΑ')]
fig = ptx.pie(filtered_df, values='EXPENSE', names='EXPENSE SUBCATEGORY', title='2020 Non Repeating Operating Expenses Pie Chart')
fig.show()

In [32]:
filtered_df = expenses_df[(expenses_df['DATE'].dt.year == 2020) & (expenses_df['EXPENSE CATEGORY'] == 'ΕΠΕΝΔΥΣΕΙΣ')]
fig = ptx.pie(filtered_df, values='EXPENSE', names='EXPENSE SUBCATEGORY', title='2020 Investment Expenses Pie Chart')
fig.show()

In [34]:
filtered_df = expenses_df[(expenses_df['DATE'].dt.year == 2020) & (expenses_df['EXPENSE CATEGORY'] == 'ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ')]
fig = ptx.pie(filtered_df, values='EXPENSE', names='EXPENSE SUBCATEGORY', title='2020 Social Security Expenses Pie Chart')
fig.show()