# import modules and processed csv

In [26]:
# import relevant modules
import plotly.express as px
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path
from PyPDF2 import PdfMerger
import re
import shutil

# import and setup display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# read df
finances = pd.read_csv('bank_statement_processed.csv', delimiter=';')

# create dfs for the charts

## group and colors dict

In [27]:
# dict with maingrups and hex-codes for color
groups_and_colors = {'apartment': '#e5d2b3',
                     'contracts': '#9c917f',
                     'dog': '#543400',
                     'cost of living': '#244c13',
                     'car': '#b2996e',
                     'insurances': '#93c47d',
                     'savings': '#9e722d',
                     'other expenses': '#c79057',
                     'quality time': '#e06666',
                     'salary': '#668326',
                     'other income': '#7f9a73'}

## sunburst chart 1 - expenses

In [28]:
# create df for expenses
finances_expenses = finances.loc[finances['category'] == 'expenses']
# ignore transfers
finances_expenses = finances_expenses.loc[finances_expenses['subgroup'] != 'transfers']

# group this df
finances_expenses_grouped = finances_expenses.groupby(['maingroup', 'subgroup'])['amount'].sum().reset_index(name ='total_amount')
finances_expenses_grouped['total_amount'] = round(abs(finances_expenses_grouped['total_amount']),2)

# create column and dict for color in this df
finances_expenses_grouped['color'] = ''
for i in range(len(groups_and_colors)):
    finances_expenses_grouped.loc[finances_expenses_grouped['maingroup'].str.contains(list(groups_and_colors)[i], flags=re.I, regex=True, na=False),
                                  'color'] = list(groups_and_colors.values())[i]
colorMapSubset1 = dict(zip(finances_expenses_grouped.color, finances_expenses_grouped.color))

# print df for testing
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     display(finances_expenses_grouped)

## sunburst chart 2 - income

In [29]:
# create df for income
finances_income = finances.loc[finances['category'] == 'income']
# ignore transfers
finances_income = finances_income.loc[finances_income['subgroup'] != 'transfers']

# group this df
finances_income_grouped = finances_income.groupby(['maingroup', 'subgroup'])['amount'].sum().reset_index(name ='total_amount')
finances_income_grouped['total_amount'] = round(abs(finances_income_grouped['total_amount']),2)

# create column and dict for color in this df
finances_income_grouped['color'] = ''
for i in range(len(groups_and_colors)):
    finances_income_grouped.loc[finances_income_grouped['maingroup'].str.contains(list(groups_and_colors)[i], flags=re.I, regex=True, na=False),
                                'color'] = list(groups_and_colors.values())[i]
colorMapSubset2 = dict(zip(finances_income_grouped.color, finances_income_grouped.color))

# print df for testing
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     display(finances_income_grouped)

## bar chart - all transactions

In [31]:
# create df for all transactions and ignore transfers 
finances = finances.loc[finances['subgroup'] != 'transfers']

# group this df
finances_grouped = finances.groupby(['category', 'maingroup'])['amount'].sum().reset_index(name ='total_amount')
finances_grouped['total_amount'] = round(abs(finances_grouped['total_amount']),2)

# create column for transaction amount as str type
finances_grouped['total_amount_str'] = finances_grouped['total_amount']
convert_dict = {'total_amount_str': str}
finances_grouped = finances_grouped.astype(convert_dict)

# create column for the label in the bar chart
finances_grouped['label'] = finances_grouped['maingroup'] + ': ' + finances_grouped['total_amount_str']

# create column and dict for color in this df
finances_grouped['color'] = ''
for i in range(len(groups_and_colors)):
    finances_grouped.loc[finances_grouped['maingroup'].str.contains(list(groups_and_colors)[i], flags=re.I, regex=True, na=False),
                         'color'] = list(groups_and_colors.values())[i]
colorMapSubset3 = dict(zip(finances_grouped.color, finances_grouped.color))
    
# print df for testing
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     display(finances_grouped)

# create charts

## sunburst charts

In [32]:
# create and modify chart 1 - expenses
fig_expenses = px.sunburst(finances_expenses_grouped, path=['maingroup', 'subgroup'], values='total_amount', color='color', color_discrete_map=colorMapSubset1)

fig_expenses.update_traces(textinfo='label+value+percent parent')
fig_expenses.update_layout(autosize=False,
                   height=1500,
                   width=1500,
                   title_text='expenses in €',
                   title_x=0.5,
                   title_font_size=33)


# create and modify chart 2 - income
fig_income = px.sunburst(finances_income_grouped, path=['maingroup', 'subgroup'], values='total_amount', color='color', color_discrete_map=colorMapSubset2)

fig_income.update_traces(textinfo='label+value+percent parent')
fig_income.update_layout(autosize=False,
                   height=1500,
                   width=1500,
                   title_text='income in €',
                   title_x=0.5,
                   title_font_size=33)

# temp save both charts
filepath_expenses = Path('results/temp/expenses_sunburst_diagramm.pdf')
filepath_expenses.parent.mkdir(parents=True, exist_ok=True)
fig_expenses.write_image(filepath_expenses)

filepath_income = Path('results/temp/income_sunburst_diagramm.pdf')
filepath_income.parent.mkdir(parents=True, exist_ok=True)
fig_income.write_image(filepath_income)

# show both charts
fig_expenses.show()
fig_income.show()

## bar chart

In [33]:
# create and modify bar chart - all transactions
fig_bar = px.bar(finances_grouped, x="category", y="total_amount", title="expenses vs. income in €", height=1300, width=1500, text='label', color='color', color_discrete_map=colorMapSubset3)

fig_bar.update_traces(textposition='inside')
fig_bar.update_layout(font=dict(size=11),
                      showlegend=False,
                      title_x=0.5,
                      title_font_size=33,
                      yaxis=dict(tickmode='linear', tick0=0, dtick=100))

# temp save bar chart
filepath_bar = Path('results/temp/expenses_vs_income_barplot.pdf')
filepath_bar.parent.mkdir(parents=True, exist_ok=True)
fig_bar.write_image(filepath_bar)

# show bar chart
fig_bar.show()

# create pdf with all three charts

In [34]:
# create variable 'merger' (instance of PdfMerger() class)
merger = PdfMerger()

# create list with individual charts
pdf_files = [filepath_expenses, filepath_income, filepath_bar]

# iterate through all individual charts and append them to 'merger'
for pdf_file in pdf_files:
    merger.append(pdf_file)

# save merged pdf file containing all three pdf files
merger.write("results/plots.pdf")
merger.close()

# delete temp folder with the three individual charts
shutil.rmtree('results/temp')