In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import openpyxl as pyxl
import pandas as pd
import numpy as np

from bidict import bidict

from excel_helpers import *
from db_entries import *
from unified_excel_sheet import *

import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

import logging
logger = logging.getLogger()

In [3]:
%%capture

db_filename = 'my-budget-dev-v3.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')

In [4]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'

current_sheet = 'August'
comment_sheet = 'august_'

# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)


august = wb.get_sheet_by_name(current_sheet)
august_ = wb.copy_worksheet(august)
august_.title = comment_sheet
august_.sheet_state = 'hidden'

In [5]:
def autogenerate_database_comment_multiple_excel_ranges(filename, ranges,
                                                        comment_sheet) :
    try :
        ranges_string = ', '.join(ranges[:-1]) + \
                        (' and ' if len(ranges) > 1 else '') + ranges[-1]
    except IndexError :
        raise ValueError('You should provide at least one Excel range')
    return 'This entry was automatically generated from the excel file ' + \
           '{}. It is based on the cells {}. A note has been '.format(
               filename, ranges_string) + \
           'added to the respective cells in the sheet {}.'.format(comment_sheet.title)

def autogenerate_database_comment(filename, range_, comment_sheet) :
    return autogenerate_database_comment_multiple_excel_ranges(
                filename, [range_], comment_sheet)

def autogenerate_excel_comment(date, db_filename, the_id) :
    return 'On {} this cell was automatically read and '.format(date) + \
           'inserted into the database {}. The id ofthe entry is {}.'.format(
               date, db_filename, the_id)

In [6]:
def automatically_enter_payment_row(date, payment_type, description, amount, money_pot, budget_pot,
                                    ranges, excel_filename, current_sheet, comment_sheet, db_filename) :

    list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
    
    database_comment = autogenerate_database_comment_multiple_excel_ranges(
        excel_filename, list_of_ranges, comment_sheet.title)
    
    the_id = put_payment_into_database_autogenerated(db, date, payment_type, description, amount, 
                                                     money_pot, budget_pot, database_comment)
   
    excel_comment = autogenerate_excel_comment(
        dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    
    #Get a list of all referenced cells in the ranges
    cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
        
    put_comment_into_excel(comment_sheet, cells, excel_comment)
    
    return the_id

def automatically_enter_transfer_row(date, transfer_type, description, amount, money_pot_source, 
                                     money_pot_sink, ranges, excel_filename, current_sheet, 
                                     comment_sheet, db_filename, effect_date = None) :

    list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
    
    database_comment = autogenerate_database_comment_multiple_excel_ranges(
        excel_filename, list_of_ranges, comment_sheet)
    
    the_id = put_transfer_into_database_autogenerated(db, date, transfer_type, description, amount, 
                                                      money_pot_source, money_pot_sink, comment=database_comment)
   
    excel_comment = autogenerate_excel_comment(
        dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    
    #Get a list of all referenced cells in the ranges
    cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
        
    put_comment_into_excel(comment_sheet, cells, excel_comment)
    
    return the_id

In [7]:
def subset_data_frame(data_frame, rules) :
    """Subset a pandas.DataFrame by a dictionary of rules. The rules should have the following form:
    {<column name> : <restriction>} where <restriction> is either a number, a string starting with 
    '<' or '>' and followed by number or a general string. If it is a simple string or number, the 
    subsetting expression
        data_frame[dataframe[<column name>] == <restriction>]
    will be used. The other case will be interpreted as
        data_frame[dataframe[<column name>] <|> <number>].
    """
    
    result = data_frame
    for rule in rules.items() :
        column = rule[0]
        restriction = rule[1]
        
        if type(restriction) is str and restriction[0] in ['<', '>'] :
                try :
                    number = float(restriction[1:])
                except :
                    raise ValueError('After a rule starting with < or > you need to specify a number')
                result = result.loc[result[column] < number] if restriction[0] == '<' \
                            else result.loc[result[column] > number]
                continue

        result = result.loc[result[column] == restriction]
        if len(result) == 0 :
            logger.info('After the rule ' + str(rule) + ' no item was left')
            return result
    
    logger.info(str(len(result)) + ' items were selected.')
    return result

def multirule_subset(data_frame, rules) :
    """Collect multiple subsets (union of multiple subsets) of a pandas.DataFrame into a single DataFrame
    by using subset_data_frame.
    
    Keyword Arguments:
        data_frame -- the DataFrame to be subsetted
        rules      -- a list of dictionaries, where the dicts fulfill the requirements specified for 
                      subset_data_frame
    """
    new_data = pd.DataFrame(columns=data_frame.columns)
    for rule in rules :
        new_data = new_data.append(subset_data_frame(data_frame, rule))
        
    return new_data

In [8]:
col_titles_budget = ['budget_pot', 'description', 'date', 'amount', 'excel_range']
col_titles_payments = ['description', 'date', 'amount', 'excel_range']

budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = col_titles_budget

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles_payments
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles_payments
paul_bargeld['money_pot'] = 'BP'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles_payments
konto['money_pot'] = 'KG'

all_payments = pd.concat([max_bargeld, paul_bargeld, konto])

all_info = pd.merge(budgeting, all_payments, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

all_info['treated'] = 'No'

all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)

In [10]:
## Creating Data for Bargeld Payments data

rules = [{'_merge' : 'both', 'money_pot' : B,  'treated' : 'No'} for B in ['BM', 'BP']]
full_match_bargeld = multirule_subset(all_info, rules)

bargeld_wb_filename = 'bargeld_payments.xlsx'

definition_data = 'base_excel.json'

with open(definition_data) as f :
    json_data = f.read()
    sheet_def = json.loads(json_data)

sheet_def['main_sheet']['type_description']['default'] = 'Barzahlung'
sheet_def['main_sheet']['is_budget_event']['default'] = 'X'
sheet_def['main_sheet']['is_payment']['default'] = 'X'
sheet_def['main_sheet']['ignore']['default'] = ''

sheet_def['main_sheet']['is_complex']['hidden'] = True
sheet_def['main_sheet']['temporary_id']['hidden'] = True
sheet_def['main_sheet']['is_transfer']['hidden'] = True
sheet_def['main_sheet']['is_recieving']['hidden'] = True
sheet_def['main_sheet']['money_pot_sink']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True

create_excel_table_from_data(full_match_bargeld, bargeld_wb_filename, db, sheet_def)
checked_full_match_bargeld = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)

The file bargeld_payments.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: 
The read data is:


Unnamed: 0,ignore,is_complex,temporary_id,event_type,type_description,description,date,amount,money_pot,money_pot_name,...,budget_type,is_recieving,is_payment,is_transfer,in_group,group_name,excel_range_x,excel_range_y,_merge,treated
68,,,,B,Barzahlung,Eisessen,2017-08-13,-4.8,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A74:D74,H13:J13,both,No
69,X,,,B,Barzahlung,Wasser,2017-08-13,-1.3,BM,Bargeld Max,...,Lebensmittel,,X,,,,A75:D75,H14:J14,both,No
79,X,,,B,Barzahlung,Eisessen,2017-08-17,-1.2,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A85:D85,H16:J16,both,No
84,,,,B,Barzahlung,Toilette,2017-08-18,-0.25,BM,Bargeld Max,...,Miscellaneous,,X,,,,A90:D90,H17:J17,both,No
85,,,,B,Barzahlung,Spitzer,2017-08-19,-2.0,BM,Bargeld Max,...,Langlebige Produkte,,X,,,,A91:D91,H18:J18,both,No
98,,,,B,Barzahlung,Wein,2017-08-22,-10.0,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A103:D103,H23:J23,both,No
101,,,,B,Barzahlung,Tapas,2017-08-23,-14.0,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A106:D106,H24:J24,both,No
105,,,,B,Barzahlung,Eis essen,2017-08-25,-1.2,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A110:D110,H26:J26,both,No
111,,,,B,Barzahlung,Eis essen,2017-08-28,-1.2,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A116:D116,H27:J27,both,No
30,,,,B,Barzahlung,Brot,2017-08-01,-0.95,BP,Bargeld Paul,...,Lebensmittel,,X,,,,A36:D36,K8:M8,both,No



If you are not satisfied with the read data, enter any number. If you do this, the workbook will be opened again. Continue with any other input; abort with Q: 
OK.


In [11]:
## create empy dataframe first
untreated_data = checked_full_match_bargeld[checked_full_match_bargeld['treated'] == '']

for index, row in checked_full_match_bargeld.iterrows() :
    if row['ignore'] == 'X' or row['is_complex'] == 'X' :
        untreated_data = untreated_data.append(checked_full_match_bargeld.loc[index])
        continue
    elif not np.isnan(row['temporary_id']) : 
        raise NotImplementedError('The grouping of events by ID is not implemented yet.')
    
    the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
        row['description'], row['amount'], row['money_pot'], 
        row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
        excel_filename, august, august_, db_filename)
    
    if not np.isnan(row['in_group']) :
        crsr = db.cursor()
        if row['in_group'] not in event_groups_df.index :
            if row['group_name'][0] == '=' :
                raise ValueError(
                    "You should not use a formula as group name! The given group name was {}.".format(
                        row['group_name']))
            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
                row['in_group'], row['group_name']))
        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
        db.commit()
    
    all_info.loc[index, 'treated'] = 'Yes'

In [13]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'both', 'money_pot' : 'KG',  'treated' : 'No'}
full_match_konto = subset_data_frame(all_info, subsetting_rules)

konto_wb_filename = 'konto_payments.xlsx'

definition_data = 'base_excel.json'

with open(definition_data) as f :
    json_data = f.read()
    sheet_def = json.loads(json_data)

sheet_def['main_sheet']['type_description']['default'] = 'Kartenzahlung'
sheet_def['main_sheet']['is_budget_event']['default'] = 'X'
sheet_def['main_sheet']['is_payment']['default'] = 'X'
sheet_def['main_sheet']['ignore']['default'] = ''

sheet_def['main_sheet']['is_complex']['hidden'] = True
sheet_def['main_sheet']['temporary_id']['hidden'] = True
sheet_def['main_sheet']['is_transfer']['hidden'] = True
sheet_def['main_sheet']['is_recieving']['hidden'] = True
sheet_def['main_sheet']['money_pot_sink']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True

create_excel_table_from_data(full_match_konto, konto_wb_filename, db, sheet_def)
checked_full_match_konto = use_excel_for_data_entry(konto_wb_filename, copy_mode=False)

The file konto_payments.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: 
The read data is:


Unnamed: 0,ignore,is_complex,temporary_id,event_type,type_description,description,date,amount,money_pot,money_pot_name,...,budget_type,is_recieving,is_payment,is_transfer,in_group,group_name,excel_range_x,excel_range_y,_merge,treated
0,,,,K,Kartenzahlung,Miete,2017-08-01,-568.0,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A6:D6,N13:P13,both,No
1,,,,K,Kartenzahlung,Miete FFM,2017-08-02,-450.0,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A7:D7,N20:P20,both,No
2,,,,K,Kartenzahlung,Berufsunfähigkeitsversicherung,2017-08-01,-49.05,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A8:D8,N12:P12,both,No
3,,,,K,Kartenzahlung,Strom EnviaM,2017-08-30,-51.0,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A9:D9,N89:P89,both,No
7,,,,K,Kartenzahlung,Haftpflichtversicherung,2017-08-01,-7.5,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A13:D13,N11:P11,both,No
12,,,,K,Kartenzahlung,Handy Max,2017-08-10,-7.99,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A18:D18,N41:P41,both,No
14,X,,,K,Kartenzahlung,Spotify Max,2017-08-15,-4.99,KG,gemeinsames Konto,...,Arzeneimittel,,X,,,,A20:D20,N52:P52,both,No
15,,,,K,Kartenzahlung,Apple Music Paul,2017-08-25,-4.99,KG,gemeinsames Konto,...,Arzeneimittel,,X,,,,A21:D21,N76:P76,both,No
19,,,,K,Kartenzahlung,Monatskarte FFM,2017-07-31,-87.4,KG,gemeinsames Konto,...,regelmäßige und budgetierte Ausgaben,,X,,,,A25:D25,N9:P9,both,No
21,,,,K,Kartenzahlung,Fitnessstudio,2017-08-03,-39.8,KG,gemeinsames Konto,...,Arzeneimittel,,X,,,,A27:D27,N23:P23,both,No



If you are not satisfied with the read data, enter any number. If you do this, the workbook will be opened again. Continue with any other input; abort with Q: 
OK.


In [14]:
for index, row in checked_full_match_konto.iterrows() :
    if row['ignore'] == 'X' or row['is_complex'] == 'X' :
        untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
        continue
    elif not np.isnan(row['temporary_id']) : 
        raise NotImplementedError('The grouping of events by ID is not implemented yet.')
    
    the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
        row['description'], row['amount'], row['money_pot'], 
        row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
        excel_filename, august, august_, db_filename)
    
    if not np.isnan(row['in_group']) :
        crsr = db.cursor()
        if row['in_group'] not in event_groups_df.index :
            if row['group_name'][0] == '=' :
                raise ValueError(
                    "You should not use a formula as group name! The given group name was {}.".format(
                        row['group_name']))
            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
                row['in_group'], row['group_name']))
        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
        db.commit()
    
    all_info.loc[index, 'treated'] = 'Yes'

In [16]:
## List of all fully matched things we didnt treat yet
subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
remaining_matched = subset_data_frame(all_info, subsetting_rules)

display(remaining_matched)
display(untreated_data)

Unnamed: 0,budget_pot,description,date,amount,excel_range_x,excel_range_y,money_pot,_merge,treated
14,G,Spotify Max,2017-08-15,-4.99,A20:D20,N52:P52,KG,both,No
46,L,Edeka,2017-08-05,-5.43,A52:D52,N31:P31,KG,both,No
69,L,Wasser,2017-08-13,-1.3,A75:D75,H14:J14,BM,both,No
79,A,Eisessen,2017-08-17,-1.2,A85:D85,H16:J16,BM,both,No


Unnamed: 0,ignore,is_complex,temporary_id,event_type,type_description,description,date,amount,money_pot,money_pot_name,...,budget_type,is_recieving,is_payment,is_transfer,in_group,group_name,excel_range_x,excel_range_y,_merge,treated
69,X,,,B,Barzahlung,Wasser,2017-08-13,-1.3,BM,Bargeld Max,...,Lebensmittel,,X,,,,A75:D75,H14:J14,both,No
79,X,,,B,Barzahlung,Eisessen,2017-08-17,-1.2,BM,Bargeld Max,...,Ausgehen Restaurant,,X,,,,A85:D85,H16:J16,both,No
14,X,,,K,Kartenzahlung,Spotify Max,2017-08-15,-4.99,KG,gemeinsames Konto,...,Arzeneimittel,,X,,,,A20:D20,N52:P52,both,No
46,X,,,K,Kartenzahlung,Edeka,2017-08-05,-5.43,KG,gemeinsames Konto,...,Lebensmittel,,X,,,,A52:D52,N31:P31,both,No


In [17]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'treated' : 'No'}
remaining = subset_data_frame(all_info, subsetting_rules)
remaining = remaining.loc[remaining.index.difference(untreated_data.index)]

wb_filename = 'remaining.xlsx'
definition_data = 'base_excel.json'

create_excel_table_from_data(remaining, wb_filename, db, definition_data)
checked_remaining = use_excel_for_data_entry(wb_filename, copy_mode=False)

The file remaining.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: Q


RuntimeError: The Excel data entry procedure was aborted.

In [None]:
for index, row in checked_remaining.iterrows() :
    if row['ignore'] == 'X' :
        untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
        continue
    
    if row['is_complex'] == 'X' or not np.isnan(row['temporary_id']) :
        raise NotImplementedError('Use the temporary id to group things here')
    
    if row['is_payment'] == 'X' :
        the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
            row['description'], row['amount'], row['money_pot'], 
            row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
            excel_filename, august, august_, db_filename)
    elif row['is_recieving'] == 'X' :
        pass
    elif row['is_transfer'] == 'X' :
        pass
    
    if not np.isnan(row['in_group']) :
        crsr = db.cursor()
        if row['in_group'] not in event_groups_df.index :
            if row['group_name'][0] == '=' :
                raise ValueError(
                    "You should not use a formula as group name! The given group name was {}.".format(
                        row['group_name']))
            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
                row['in_group'], row['group_name']))
        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
        db.commit()
    
    all_info.loc[index, 'treated'] = 'Yes'

In [None]:
wb.save('GemBil.xlsx')

In [None]:
db.close()

In [None]:
display(event_groups_df)

if 13 in event_groups_df.index :
    print('hesy')