In [None]:
import openpyxl as pyxl
import pandas as pd
from gspread.utils import a1_to_rowcol, rowcol_to_a1

import re
from itertools import product
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

In [None]:
db_filename = 'my-budget-dev-v0.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

In [None]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'

current_sheet = 'August'
comment_sheet = 'august_'

wb = pyxl.load_workbook(excel_filename)


august = wb.get_sheet_by_name(current_sheet)
august_ = wb.copy_worksheet(august)
august_.title = comment_sheet
august_.sheet_state = 'hidden'

In [None]:
def list_from_range_string(range_string) :
    '''Extract all individual cell names from a excel range.
    
    Keyword arguments:
        range_string - The excel expression for the range
    
    Example:
        If range_string == 'A1:B3' then the list ['A1', 'B1', 'A2', 'B2', 'A3', 'B3'] is returned
    '''
    colon_position = range_string.find(':')
    if colon_position == -1 :
        raise
    first_cell = range_string[:colon_position]
    last_cell = range_string[colon_position+1:]
    
    first_row, first_col = a1_to_rowcol(first_cell)
    last_row, last_col = a1_to_rowcol(last_cell)
    
    return [rowcol_to_a1(i,j) for i,j in product(range(first_row, last_row+1), range(first_col, last_col+1))]

In [None]:
def generate_id(date) :
    '''Generate a new unique ID in the budgeter on the database. An ID is an integer with 8 digits, where 
    the first digit are based on the date and the last two digits are a serial number.
    
    Keyword arguments:
        date - the date on which the ID should be based
        
    Example: 
        If date==DateTime('2017-08-17') and the database contains the IDs 
        2017081701, 2017081702, 2017081703 and 2017081705, then the id 2017081704 is returned.
    
    Exceptions:
        IndexError - if all 99 possible serial numbers (01-99) have already been distributed.
        
        N.B. Could also Except, if the Database Call raises an exception.
    '''
    date_int = int(date.strftime('%Y%m%d00'))
    crsr = db.cursor()
    crsr.execute('SELECT id FROM money_events WHERE id BETWEEN {} AND {}'.format(date_int, date_int + 99))
    results = [row[0] for row in crsr.fetchall()]

    current_id = date_int + 1
    while current_id in results :
        current_id += 1
    if current_id > date_int + 99 :
        raise IndexError('Encountered to many ids for the date {}'.format(date))

    return current_id

In [None]:
def date_convert(item) :
    '''Try to get a DateTime from the excel cell, independent if it is represented by an integer, i.e. in 
    the native excel date format, or a string representation
    '''
    try :
          return pyxl.utils.datetime.from_excel(item)
    except :
        if type(item) is str :
            return pd.to_datetime(item)
        else :
            return pd.Timestamp(item)
        #pass
            

def get_df_by_range(sheet, first_cell, last_cell, date_cols=None) :
    '''Read a given range on the given sheet and return a DataFrame containing the data.
    
    Keyword arguments:
        sheet      - a openpyxl sheet object which is to be read
        first_cell - the top left cell of the range to be read; in excel cell notation
        last_cell  - the bottom right cell of the range to be read; in excel cell notation
        date_cols  - a column (or list of columns) which are assumed to contain dates and shall be returned
                     as pandas Timestamp object; can be given either as number (starting in zero) or a excel
                     column name
                     
        ToDo : Look if starting in zero is correct 
    '''
    data_rows = [[cell.value for cell in row] + ['{0}:{1}'.format(row[0].coordinate, row[-1].coordinate)]
        for row in august[first_cell:last_cell]]

    df = pd.DataFrame(data_rows)
    new_index = df.iloc[:,range(len(df.columns)-1)].dropna(how='all').index
    if date_cols is not None and type(date_cols) is int :
        df.iloc[:,date_cols] = df.iloc[:,date_cols].apply(date_convert).copy()
    elif type(date_cols) is str : 
        df.loc[:,date_cols] = df.loc[:,date_cols].apply(date_convert).copy()
    elif type(date_cols) is list :
        for col in date_cols :
            if type(col) is int :
                df.iloc[:,col] = df.iloc[:,col].apply(date_convert).copy()
            if type(col) is str : 
                df.loc[:,col] = df.loc[:,col].apply(date_convert).copy()
    return df.loc[new_index]

In [None]:
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = ['budget_type', 'description', 'date', 'amount', 'excel_range']

col_titles = ['description', 'date', 'amount', 'excel_range']

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BM'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles
konto['money_pot'] = 'KG'

conjoined = pd.concat([max_bargeld, paul_bargeld, konto])

In [None]:
all_info = pd.merge(budgeting, conjoined, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

perfect_result = all_info[all_info['_merge'] == 'both']

In [None]:
#display(perfect_result)

for index, row in perfect_result.iterrows() :
    database_comment = '''This entry was automatically generated on {} from the excel file {}. It is based 
                          on the cells {} and {}. A note has been added to the respective cells in the sheet {}.
                       '''.format(dt.now().strftime('%Y-%m-%d'), excel_filename, 
                               current_sheet + '!' + row['excel_range_x'],
                               current_sheet + '!' + row['excel_range_y'], comment_sheet)
    
    the_id = generate_id(row['date'])
    
    crsr = db.cursor()
    # ToDo : Hier machen wir die simplifizierende Annahme, dass alle Konto-Events Kartenzahlungen sind
    crsr.execute('''INSERT INTO money_events VALUES ({}, "{}", "{}", datetime("{}"), NULL, "{}", NULL);'''.format(the_id, 
        'Kartenzahlung' if row['money_pot'] == 'KG' else 'Barzahlung', row['description'], row['date'], 
        database_comment))
    crsr.execute('''INSERT INTO payments VALUES ({}, "{}", {}, NULL, NULL, NULL, NULL);'''.format(the_id, 
        row['money_pot'], row['amount']))
    crsr.execute('''INSERT INTO budget_events VALUES ({}, "{}", {}, NULL, NULL, NULL, NULL);'''.format(the_id, 
        row['budget_type'], row['amount']))
    db.commit()
   
    comment_text = '''On {} this cell was automatically read and inserted into the database {}. The id of 
                      the entry is {}.'''.format(dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    comment = pyxl.comments.Comment(comment_text, 'budgeter')
    for cell in list_from_range_string(row['excel_range_x']) + \
                list_from_range_string(row['excel_range_y']) :
        august_[cell].comment = comment
        
for table in ['money_events', 'payments', 'budget_events'] :
    print('The table {}: '.format(table[0]))
    display(pd.read_sql_query('SELECT * FROM {};'.format(table[0]), db))

In [None]:
for index, row in perfect_result.iterrows() :
    comment_text = 'This comment template is just a test.' + \
                   'The id is {} from the df row {}.'.format(id_generation(row['date']), index)
    comment = pyxl.comments.Comment(comment_text, 'budgeter')
    for cell in list_from_range_string(row['excel_range_x']) + \
                list_from_range_string(row['excel_range_y']) :
        august_[cell].comment = comment    

In [None]:
wb.save('GemBil.xlsx')

In [None]:
db.close()