In [None]:
import piecash
import os
import numpy as np
import pandas as pd
import inspect
from datetime import datetime

In [1]:
#bokeh imports

from bokeh.plotting import figure, output_notebook, show

In [None]:
def checkNameForSubCategory(stringList, names=['Maciek', 'Justyna']):
    '''Checks if there is any name from names in List of Strings. 
    List is checked only from the 3rd value up to one before last. 
    
    If the name is found, it is returned. If not, np.nan is returned.
    '''
    returnString = np.nan
    for name in names:
        returnString = name if name in ",".join(stringList[2:-1]) else returnString
    
    return returnString

In [None]:
#creating a list of lists to be used as a body of DataFrame

pwd_ = %pwd
fileName_ = pwd_ + "\\gnucash_files\\finanse_sql.gnucash"

with piecash.open_book(fileName_) as book:

    transaction_list = []
    for tr in book.transactions:
        split = tr.splits
        for single_row in split:
            if single_row.account.type == "EXPENSE":
                memo = single_row.memo.strip()
                memo = memo if len(memo) > 0 else np.nan

                tempList = list(map(str, [tr.description, 
                                          tr.post_date, 
                                          memo, 
                                          single_row.account.fullname, 
                                          single_row.value, 
                                          tr.currency.mnemonic]))
                transaction_list.append(tempList)

In [None]:
#creating pandas DataFrame from created list of lists and adding columns

sourceData = transaction_list

financeFrame = pd.DataFrame(sourceData, columns=['Name', 'Date', 'Split Description', 'Account', 'Price', 'Currency'])
financeFrame = financeFrame.sort_values(['Date', 'Name'])
financeFrame['Split Description'] = financeFrame['Split Description'].replace('nan', np.nan)

finalFrame = financeFrame.copy()

#adding Product and Shop from Split Description and Name
finalFrame['Product'] = finalFrame['Split Description'].fillna(finalFrame['Name'])
cond = finalFrame['Split Description'].isnull()
finalFrame['Shop'] = np.where(cond, np.nan, finalFrame['Name'])

finalFrame['ALL_CATEGORIES'] = finalFrame['Account'].apply(lambda x: x.split(":"))

# Extracting info from Account
'''Account -> 'Wydatki:Wspólne:Zakupy:Chemia:Osobiste - Justyna:Artykuły Do Makijażu'
    2nd: Type
    3rd to OneBeforeLast: SubCategory
    Last: Category
    '''
finalFrame['Type'] = finalFrame['ALL_CATEGORIES'].apply(lambda x: x[1])
finalFrame['Category'] = finalFrame['ALL_CATEGORIES'].apply(lambda x: x[-1])
finalFrame['SubCategory'] = finalFrame['ALL_CATEGORIES'].apply(lambda x: ":".join(x[2:-1]).strip() if len(x[2:-1]) >0 else np.nan)
finalFrame['SubType'] = finalFrame['ALL_CATEGORIES'].apply(lambda x: checkNameForSubCategory(x))

# formatting Price and Date
finalFrame['Price'] = finalFrame['Price'].apply(lambda x: float(x))
finalFrame['Date'] = finalFrame['Date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))

# dropping columns that are no longer needed
finalFrame = finalFrame.drop(['Name', 'Split Description', 'Account'], axis = 1)

# Example Bokeh Plots

In [None]:
# output to notebook
output_notebook()

# getting example data
cat = 'Nabiał'
df = finalFrame[finalFrame['Category'] == cat].groupby('Date').sum()
x = df.index
y = df['Price']

# create a plot with title and axis labels
p = figure(title=cat, x_axis_label = "Dates", y_axis_label = "Price", x_axis_type='datetime')

# plot step and line
p.step(x, y, legend_label=cat, line_width = 1)
p.line(x, y, line_width=3)

# show the results
show(p)

In [None]:
from bokeh.layouts import gridplot

# output to notebook
output_notebook()

# prepare two sets of data
# this doesn't match dates with appropriate values
agg = finalFrame.groupby(['Date', 'Category']).sum().reset_index()
x = pd.date_range(start=agg['Date'].min(), end=agg['Date'].max(), freq="D")
y1 = agg[agg['Category'] == "Nabiał"]['Price']
y2 = agg[agg['Category'] == "Pieczywo"]['Price']

# create first figure and plot
p1 = figure(width=480, height=480, title="Nabiał", x_axis_type="datetime")
p1.line(x, y1, color='navy', alpha=0.5)

# create second figure and share range with the first figure
p2 = figure(width=480, height=480, title="Pieczywo", x_range=p1.x_range, y_range=p1.y_range, x_axis_type="datetime")
p2.line(x, y2, color='firebrick', alpha=0.5)

# create gridplot and show
p_main = gridplot([[p1, p2]], toolbar_location="left")
show(p_main)

In [None]:
# output to notebook
output_notebook()

# get categorical data per month
agg = finalFrame.groupby(['Date', 'Category']).sum().reset_index()
agg['Month-Year'] = agg['Date'].dt.strftime("%m-%Y")
agg = agg.drop(['Date'], axis=1)
agg = agg[agg['Month-Year'] == "07-2019"].groupby(['Category']).sum().reset_index()
agg = agg.sort_values(by='Price', ascending=False)

# convert categorical values to x values
bar_width = 0.9
category_count = agg['Category'].unique().shape[0]
x_values = np.linspace(start=0.25, stop=category_count*(bar_width+0.25), num=category_count)

# create bokeh figure
p = figure(width=500, height=500, title="Categories in July2019", x_range=agg['Category'][:10])

# add categorical bar plots
p.vbar(x=agg['Category'][:10], top=agg['Price'][:10], width=bar_width)

# change orientation of labels
p.xaxis.major_label_orientation = 0.5

# show figure
show(p)

In [11]:
from bokeh.models import Range1d

# output to notebook
output_notebook()

# create a figure
p = figure(width=400, height=400)
p.y_range = Range1d(0, 7)

# add patch to the figure
nan = 3.5
p.patch([1, 2, 3, nan, 4, 5, 6], [6, 7, 5, nan, 7, 3, 6])

show(p)
