<a href="https://colab.research.google.com/github/tasosnikitakis/Data_Science_Notebooks/blob/main/pharmacy_data_expenses_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import plotly.express as ptx

#Dataframes Creation

In [4]:
expenses_df = pd.read_excel("/content/drive/MyDrive/pharmacy_data/Expenses_2022.xlsx")

In [5]:
expenses_df.head()

Unnamed: 0,DATE,EXPENSE CATEGORY,EXPENSE SUBCATEGORY,EXPENSE
0,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΕΝΟΙΚΙΟ,400.0
1,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΤΗΛΕΦΩΝΟ,51.5
2,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,23.67
3,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,89.76
4,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,523.25


In [9]:
expenses_df.describe()

Unnamed: 0,EXPENSE
count,464.0
mean,178.950022
std,185.173209
min,0.0
25%,42.435
50%,99.44
75%,263.62
max,1324.0


##Date column string to datetime conversion

In [10]:
expenses_df.DATE = pd.to_datetime(expenses_df.DATE, format="%d/%m/%Y")
expenses_df.style.format({"DATE": lambda t: t.strftime("%d/%m/%Y")})

Unnamed: 0,DATE,EXPENSE CATEGORY,EXPENSE SUBCATEGORY,EXPENSE
0,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΕΝΟΙΚΙΟ,400.0
1,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΤΗΛΕΦΩΝΟ,51.5
2,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,23.67
3,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,89.76
4,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,523.25
5,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΠΡΟΜΗΘΕΙΑ POS,25.93
6,20/01/2020,ΠΑΓΙΑ ΕΞΟΔΑ,ΛΟΓΙΣΤΗΣ,100.0
7,20/01/2020,ΕΚΤΑΚΤΑ ΕΞΟΔΑ,ΣΑΚΟΥΛΕΣ,33.53
8,20/01/2020,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,ΤΑΣΟΣ,263.62
9,20/01/2020,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,ΒΙΚΥ (ΙΚΑ+ΕΠΙΚΟΥΡΙΚΑ),301.01


In [12]:
expenses_category_df = expenses_df[['EXPENSE CATEGORY', 'EXPENSE']].groupby('EXPENSE CATEGORY').sum()

In [13]:
expenses_category_df

Unnamed: 0_level_0,EXPENSE
EXPENSE CATEGORY,Unnamed: 1_level_1
ΕΚΤΑΚΤΑ ΕΞΟΔΑ,147.74
ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,19181.99
ΕΚΤΑΚΤΑ ΕΞΟΔΑ,1779.25
ΕΠΕΝΔΥΣΕΙΣ,10160.3
ΠΑΓΙΑ ΕΞΟΔΑ,51763.53


In [18]:
fig = ptx.pie(labels=expenses_category_df.index, values=expenses_category_df['EXPENSE'], title="Expense Categories", names=expenses_category_df.index, hole=0.6)
fig.update_traces(textposition='inside', textfont_size=15, textinfo='percent')


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



In [20]:
operating_expenses_df = expenses_df[expenses_df['EXPENSE CATEGORY']=='ΠΑΓΙΑ ΕΞΟΔΑ']

In [21]:
operating_expenses_df

Unnamed: 0,DATE,EXPENSE CATEGORY,EXPENSE SUBCATEGORY,EXPENSE
0,2020-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΕΝΟΙΚΙΟ,400.00
1,2020-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΤΗΛΕΦΩΝΟ,51.50
2,2020-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,23.67
3,2020-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,89.76
4,2020-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,523.25
...,...,...,...,...
457,2023-02-20,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,61.17
458,2023-02-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,61.95
459,2023-02-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,343.00
460,2023-02-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΛΟΓΙΣΤΗΣ,100.00


In [26]:
operating_expenses_df = operating_expenses_df[['EXPENSE SUBCATEGORY', 'EXPENSE']].groupby('EXPENSE SUBCATEGORY').sum()

In [27]:
operating_expenses_df

Unnamed: 0_level_0,EXPENSE
EXPENSE SUBCATEGORY,Unnamed: 1_level_1
KINHTO,2858.7
SECURITY,1678.4
ΕΝΟΙΚΙΟ,15200.0
ΛΟΓΙΣΤΗΣ,3615.0
ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,16869.58
ΝΕΡΟ,26.0
ΠΡΟΜΗΘΕΙΑ POS,1043.28
ΡΕΥΜΑ,7321.79
ΤΗΛΕΦΩΝΟ,3150.78


In [28]:
fig = ptx.pie(labels=operating_expenses_df.index, values=operating_expenses_df['EXPENSE'], title="Operating Expense Categories", names=operating_expenses_df.index, hole=0.6)
fig.update_traces(textposition='inside', textfont_size=15, textinfo='percent')


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



In [31]:
yearly_expenses_df = expenses_df[expenses_df['DATE'].dt.year == 2022]

In [32]:
yearly_expenses_df

Unnamed: 0,DATE,EXPENSE CATEGORY,EXPENSE SUBCATEGORY,EXPENSE
324,2022-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΕΝΟΙΚΙΟ,400.00
325,2022-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΤΗΛΕΦΩΝΟ,107.00
326,2022-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,KINHTO,81.19
327,2022-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΡΕΥΜΑ,116.24
328,2022-01-20,ΠΑΓΙΑ ΕΞΟΔΑ,SECURITY,240.68
...,...,...,...,...
437,2022-12-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΠΡΟΜΗΘΕΙΑ POS,32.00
438,2022-12-20,ΠΑΓΙΑ ΕΞΟΔΑ,ΛΟΓΙΣΤΗΣ,100.00
439,2022-12-20,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,ΤΑΣΟΣ,248.00
440,2022-12-20,ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,ΒΙΚΥ (ΙΚΑ+ΕΠΙΚΟΥΡΙΚΑ),286.34


In [33]:
yearly_expenses_category_df = yearly_expenses_df[['EXPENSE CATEGORY', 'EXPENSE']].groupby('EXPENSE CATEGORY').sum()

In [34]:
yearly_expenses_category_df

Unnamed: 0_level_0,EXPENSE
EXPENSE CATEGORY,Unnamed: 1_level_1
ΕΚΤΑΚΤΑ ΕΞΟΔΑ,147.74
ΑΣΦΑΛΙΣΤΙΚΕΣ ΕΙΣΦΟΡΕΣ,6688.93
ΕΚΤΑΚΤΑ ΕΞΟΔΑ,14.26
ΕΠΕΝΔΥΣΕΙΣ,685.6
ΠΑΓΙΑ ΕΞΟΔΑ,16438.45


In [35]:
yearly_operating_expenses_df = yearly_expenses_df[yearly_expenses_df['EXPENSE CATEGORY']=='ΠΑΓΙΑ ΕΞΟΔΑ']

In [36]:
yearly_operating_expenses_df = yearly_operating_expenses_df[['EXPENSE SUBCATEGORY', 'EXPENSE']].groupby('EXPENSE SUBCATEGORY').sum()

In [37]:
yearly_operating_expenses_df

Unnamed: 0_level_0,EXPENSE
EXPENSE SUBCATEGORY,Unnamed: 1_level_1
KINHTO,863.07
SECURITY,437.44
ΕΝΟΙΚΙΟ,4800.0
ΛΟΓΙΣΤΗΣ,1000.0
ΜΙΣΘΟΣ ΥΠΑΛΛΗΛΟΥ,5046.69
ΠΡΟΜΗΘΕΙΑ POS,341.7
ΡΕΥΜΑ,2854.54
ΤΗΛΕΦΩΝΟ,1095.01
