In [79]:
import openpyxl as pyxl
import pandas as pd
import numpy as np
from gspread.utils import a1_to_rowcol, rowcol_to_a1

import shutil, os

import re
from itertools import product
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

In [2]:
db_filename = 'my-budget-dev-v0.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

Creating an an empty database as "my-budget-dev-v0.sqlite"
Completed creating the database scheme. It looks as follows:


Unnamed: 0,name
0,money_pots
1,event_types
2,budget_pots
3,money_events
4,payments
5,budget_events


The table money_pots: 


Unnamed: 0,key,description,liquid
0,KG,gemeinsames Konto,Yes
1,KE,Extrakonto zum gemeinsamen Konto,Yes
2,KM,Konto Max,Yes
3,KP,Konto Paul,Yes
4,KB,Konto Bundesbank,Yes
5,KC,Consorsbankkonto,No
6,BM,Bargeld Max,Yes
7,BP,Bargeld Paul,Yes
8,CB,Chipkarte Bundesbank,Semi
9,CT,Chipkarte Trianon,Semi


The table event_types: 


Unnamed: 0,type,description,abbreviation
0,Festsetzung,Festsetzen des zu einem Zeitpunkt vorhanden Be...,Fe
1,Kontrolle,positive Überprüfung des theoretischen Geldbet...,Ko
2,Differenz,Korrektur des theoretischen Geldbetrag in eine...,Di
3,Transfer,Übertragung von Geld zwischen zwei Geldtöpfen,T
4,Barzahlung,bares Bezahlen,B
5,Kartenzahlung,Zahlen mit Visa oder Girokarte,K
6,Bankeinzug,Rechnungsbegleichung durch direkten Bankeinzug,BE
7,Überweisung,Rechnungsbegleichung durch Überweisen,U
8,SEPA-Mandat,automatische (regelmäßige) Rechnungsbegleichun...,S
9,Dauerauftrag,automatische (regelmäßige) Rechnungsbegleichun...,D


The table budget_pots: 


Unnamed: 0,key,description,type
0,L,Lebensmittel,
1,A,Ausgehen Restaurant,
2,AE,"Ausgehen Eis, Cafe",A
3,AB,"Ausgehen Döner, Bistro, ...",A
4,AM,"Mensa, Kantine, Kasino",
5,S,Langlebige Produkte,
6,SK,Klamotten,S
7,R,regelmäßige und budgetierte Ausgaben,
8,RM,Mietzahlungen,R
9,RV,Versicherungs- und Vertragsbeträge,R


The table money_events: 


Unnamed: 0,id,type,description,creation_date,modification_dates,comments,complete


The table payments: 


Unnamed: 0,id,money_pot,amount,additional_description,modification_dates,comments,complete


The table budget_events: 


Unnamed: 0,id,budget_pot,amount,additional_description,budget_effect_date,modification_dates,comments,complete


In [3]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'

current_sheet = 'August'
comment_sheet = 'august_'

# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)


august = wb.get_sheet_by_name(current_sheet)
august_ = wb.copy_worksheet(august)
august_.title = comment_sheet
august_.sheet_state = 'hidden'

In [4]:
def list_from_range_string(range_string) :
    '''Extract all individual cell names from a excel range.
    
    Keyword arguments:
        range_string - The excel expression for the range
    
    Example:
        If range_string == 'A1:B3' then the list ['A1', 'B1', 'A2', 'B2', 'A3', 'B3'] is returned
    '''
    colon_position = range_string.find(':')
    if colon_position == -1 :
        raise
    first_cell = range_string[:colon_position]
    last_cell = range_string[colon_position+1:]
    
    first_row, first_col = a1_to_rowcol(first_cell)
    last_row, last_col = a1_to_rowcol(last_cell)
    
    return [rowcol_to_a1(i,j) for i,j in product(range(first_row, last_row+1), range(first_col, last_col+1))]

In [5]:
def generate_id(date) :
    '''Generate a new unique ID in the budgeter on the database. An ID is an integer with 8 digits, where 
    the first digit are based on the date and the last two digits are a serial number.
    
    Keyword arguments:
        date - the date on which the ID should be based
        
    Example: 
        If date==DateTime('2017-08-17') and the database contains the IDs 
        2017081701, 2017081702, 2017081703 and 2017081705, then the id 2017081704 is returned.
    
    Exceptions:
        IndexError - if all 99 possible serial numbers (01-99) have already been distributed.
        
        N.B. Could also Except, if the Database Call raises an exception.
    '''
    date_int = int(date.strftime('%Y%m%d00'))
    crsr = db.cursor()
    crsr.execute('SELECT id FROM money_events WHERE id BETWEEN {} AND {}'.format(date_int, date_int + 99))
    results = [row[0] for row in crsr.fetchall()]

    current_id = date_int + 1
    while current_id in results :
        current_id += 1
    if current_id > date_int + 99 :
        raise IndexError('Encountered to many ids for the date {}'.format(date))

    return current_id

In [6]:
def date_convert(item) :
    '''Try to get a DateTime from the excel cell, independent if it is represented by an integer, i.e. in 
    the native excel date format, or a string representation
    '''
    try :
          return pyxl.utils.datetime.from_excel(item)
    except :
        if type(item) is str :
            return pd.to_datetime(item)
        else :
            return pd.Timestamp(item)
        #pass
            

def get_df_by_range(sheet, first_cell, last_cell, date_cols=None) :
    '''Read a given range on the given sheet and return a DataFrame containing the data.
    
    Keyword arguments:
        sheet      - a openpyxl sheet object which is to be read
        first_cell - the top left cell of the range to be read; in excel cell notation
        last_cell  - the bottom right cell of the range to be read; in excel cell notation
        date_cols  - a column (or list of columns) which are assumed to contain dates and shall be returned
                     as pandas Timestamp object; can be given either as number (starting in zero) or a excel
                     column name
                     
        ToDo : Look if starting in zero is correct 
    '''
    data_rows = [[cell.value for cell in row] + ['{0}:{1}'.format(row[0].coordinate, row[-1].coordinate)]
        for row in august[first_cell:last_cell]]

    df = pd.DataFrame(data_rows)
    new_index = df.iloc[:,range(len(df.columns)-1)].dropna(how='all').index
    if date_cols is not None and type(date_cols) is int :
        df.iloc[:,date_cols] = df.iloc[:,date_cols].apply(date_convert).copy()
    elif type(date_cols) is str : 
        df.loc[:,date_cols] = df.loc[:,date_cols].apply(date_convert).copy()
    elif type(date_cols) is list :
        for col in date_cols :
            if type(col) is int :
                df.iloc[:,col] = df.iloc[:,col].apply(date_convert).copy()
            if type(col) is str : 
                df.loc[:,col] = df.loc[:,col].apply(date_convert).copy()
    return df.loc[new_index]

In [93]:
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = ['budget_type', 'description', 'date', 'amount', 'excel_range']

col_titles = ['description', 'date', 'amount', 'excel_range']

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BP'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles
konto['money_pot'] = 'KG'

conjoined = pd.concat([max_bargeld, paul_bargeld, konto])

In [94]:
all_info = pd.merge(budgeting, conjoined, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

all_info['prepared'] = 'No'

In [None]:
## Putting all the Info which had matching info on the left and the right hand of the budget sheet into
## the database

full_match = all_info[all_info['_merge'] == 'both']


for index, row in full_match.iterrows() :
    database_comment = '''This entry was automatically generated on {} from the excel file {}. It is based 
                          on the cells {} and {}. A note has been added to the respective cells in the sheet {}.
                       '''.format(dt.now().strftime('%Y-%m-%d'), excel_filename, 
                               current_sheet + '!' + row['excel_range_x'],
                               current_sheet + '!' + row['excel_range_y'], comment_sheet)
    
    the_id = generate_id(row['date'])
    
    crsr = db.cursor()
    # ToDo : Hier machen wir die simplifizierende Annahme, dass alle Konto-Events Kartenzahlungen sind
    crsr.execute('''INSERT INTO money_events VALUES ({}, "{}", "{}", date("{}"), NULL, "{}", NULL);
        '''.format(the_id, 'Kartenzahlung' if row['money_pot'] == 'KG' else 'Barzahlung', 
                   row['description'], row['date'], database_comment))
    crsr.execute('''INSERT INTO payments VALUES ({}, "{}", {}, NULL, NULL, NULL, NULL);
        '''.format(the_id,  row['money_pot'], row['amount']))
    crsr.execute('''INSERT INTO budget_events VALUES ({}, "{}", {}, NULL, NULL, NULL, NULL, NULL);
        '''.format(the_id, row['budget_type'], row['amount']))
    db.commit()
   
    comment_text = '''On {} this cell was automatically read and inserted into the database {}. The id of 
                      the entry is {}.'''.format(dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    comment = pyxl.comments.Comment(comment_text, 'budgeter')
    for cell in list_from_range_string(row['excel_range_x']) + \
                list_from_range_string(row['excel_range_y']) :
        august_[cell].comment = comment
        
for table in ["money_events", "payments", "budget_events"] :
    print('The table {}: '.format(table))
    display(pd.read_sql_query('SELECT * FROM {};'.format(table), db))
    
all_info.loc[all_info._merge == 'both', 'prepared'] = 'Yes'

In [96]:
all_info.loc[all_info._merge == 'both', 'prepared'] = 'Yes'

display(all_info[all_info['description'].str.contains('Schatulle')])

for index, row in all_info[all_info['description'].str.contains('Schatulle')].iterrows() :
    database_comment = '''This entry was automatically generated on {} from the excel file {}. It is based 
                          on the cells {}. A note has been added to the respective cells in the sheet {}.
                       '''.format(dt.now().strftime('%Y-%m-%d'), excel_filename, 
                               current_sheet + '!' + row['excel_range_y'], comment_sheet)
    i = 0
    while True :
        the_date = all_info.loc[index+i, 'date']
        if str(the_date) == 'NaT' :
            i = i + 1
        else :
            break
            
    the_id = the_id = generate_id(the_date)
    
    crsr = db.cursor()
    # ToDo : Hier machen wir die simplifizierende Annahme, dass alle Konto-Events Kartenzahlungen sind
    crsr.execute('''INSERT INTO money_events VALUES ({}, "{}", "{}", date("{}"), NULL, "{}", NULL);
        '''.format(the_id, 'Transfer', row['description'], row['date'], database_comment))
    
    ## No structure to enter the data

Unnamed: 0,budget_type,description,date,amount,excel_range_x,excel_range_y,money_pot,_merge,prepared
122,,Geld aus Schatulle,2017-08-05,1.3,,H10:J10,BM,right_only,No
126,,Geld aus Schatulle,2017-08-20,0.5,,H19:J19,BM,right_only,No
127,,Schatullenkorrektur,2017-08-20,1.05,,H20:J20,BM,right_only,No
128,,Schatullenkorrektur,2017-08-20,-1.05,,H21:J21,BM,right_only,No
131,,Geld aus Schatulle,NaT,1.0,,K7:M7,BP,right_only,No
132,,Geld aus Schatulle,2017-08-08,1.35,,K10:M10,BP,right_only,No


2017-08-05 00:00:00
2017-08-20 00:00:00
2017-08-20 00:00:00
2017-08-20 00:00:00
2017-08-08 00:00:00
2017-08-08 00:00:00


In [87]:
project_dir = '/Users/max/budgeter/'
template_file = './templates/simple_payment_list.xlsx'
temp_name = './present_left_side.xlsx'

os.chdir(project_dir)
shutil.copy(template_file, temp_name)

#print('Presenting the unmatched entries on the left side of the balance sheet')
#os.system('open ' + temp_name)

'./present_left_side.xlsx'

In [61]:
#left_side_info = all_info[all_info['_merge'] == 'left_only']

# Aus einem mir unerfindlich grund speichert er das 'unhiding' der Spalte G erst nach 2 mal speichern

wb_temp = pyxl.load_workbook(temp_name)
active_sheet = wb_temp.active
print(active_sheet['A1'].value)
print(active_sheet.column_dimensions['G'].hidden)
active_sheet.column_dimensions['G'].hidden = False
wb_temp.save(temp_name)

wb_temp = pyxl.load_workbook(temp_name)
active_sheet = wb_temp.active




wb_temp.save(temp_name)

os.system('open ' + temp_name)

#for index, row in left_side_info.iterrows() :
    

You can enter simple transaction here.  
Some data restrictions are enforced
Please remember to save before exiting.
True


  warn(msg)


0

In [None]:
wb.save('GemBil.xlsx')

In [None]:
db.close()

In [19]:
display(left_side_info[0:20])

Unnamed: 0,budget_type,description,date,amount,excel_range_x,excel_range_y,money_pot,_merge
4,R,Vodafone,2017-08-28,-19.99,A10:D10,,,left_only
5,M,Telefonie,2017-08-28,-6.2,A11:D11,,,left_only
6,R,Rechtsschutzversicherung,2017-01-30,-13.9,A12:D12,,,left_only
8,G,GEW,2017-07-05,-2.5,A14:D14,,,left_only
9,R,Semestergebühr Paul,2016-06-21,-46.151667,A15:D15,,,left_only
10,R,Semestergebühr Max,2016-06-21,-50.565,A16:D16,,,left_only
11,R,GEZ,2017-06-29,-17.5,A17:D17,,,left_only
13,R,Handy Paul,2017-08-10,-7.99,A19:D19,,,left_only
16,G,Backblaze Max,2017-04-18,-3.770833,A22:D22,,,left_only
17,R,Sommertickets,2017-07-20,-48.0,A23:D23,,,left_only
