## All

In [None]:
plik = '2248'

In [None]:
import pandas as pd
from decimal import Decimal, ROUND_HALF_UP

In [None]:
def format_to_two_decimals(value):
    return Decimal(value).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)

In [None]:
bc = pd.read_excel(f'{plik}.xlsx', sheet_name='BC', header=0)
# column 'data' is a date column
# bc['debet'] and bc['kredyt'] are decimals - but some of them contain spaces or other white characters inside. ignore these characters
bc['data'] = pd.to_datetime(bc['data'], format='%Y-%m-%d')
bc['debet'] = bc['debet'].astype(str)
bc['kredyt'] = bc['kredyt'].astype(str)
bc['debet'] = bc['debet'].str.replace(r'\s', '', regex=True)
bc['debet'] = bc['debet'].str.replace(r',', '.', regex=True)
bc['kredyt'] = bc['kredyt'].str.replace(r'\s', '', regex=True)
bc['kredyt'] = bc['kredyt'].str.replace(r',', '.', regex=True)
bc['debet'] = bc['debet'].apply(lambda x: format_to_two_decimals(x) if x else Decimal('0'))
bc['kredyt'] = bc['kredyt'].apply(lambda x: format_to_two_decimals(x) if x else Decimal('0'))

## Santander

In [None]:
bank = pd.read_excel(f'{plik}.xlsx', sheet_name='bank', header=0)

In [None]:
bank.head()

In [None]:
# add a column to bank 'co' with values: 'uznanie' if 'kwota' is positive, 'obciążenie' if 'kwota' is negative
bank['co'] = bank['Kwota'].apply(lambda x: 'uznanie' if x > 0 else 'obciążenie')

In [None]:
# create a new dataframe from bank. group by 'Data księgowania' and make 2 columns 'Uznania' and 'Obciążenia' summing up the values of 'Kwota' for column 'co' having value of 'uznanie' or 'obciążenie' respectively
sant = bank.groupby('Data księgowania').apply(lambda x: pd.Series({'Uznania': x[x['co'] == 'uznanie']['Kwota'].sum(), 'Obciążenia': x[x['co'] == 'obciążenie']['Kwota'].sum()})).reset_index()

In [None]:
sant['Obciążenia'] = sant['Obciążenia'].abs()
sant['Uznania'] = sant['Uznania'].apply(lambda x: format_to_two_decimals(x) if x else Decimal('0'))
sant['Obciążenia'] = sant['Obciążenia'].apply(lambda x: format_to_two_decimals(x) if x else Decimal('0'))

In [None]:
# change type of data in column 'Data księgowania' to datetime
sant['Data księgowania'] = pd.to_datetime(sant['Data księgowania'], format='%Y-%m-%d')

In [None]:
# now outer merge the two dataframes on the date. let the new dataframe be 'merged' and contain: 'data', 'kredyt_bc', 'debet_bc', 'uznania_bank', 'obciazenia_bank'
merged = pd.merge(bc, sant, left_on='data', right_on='Data księgowania', how='outer')


In [None]:
# change the columns a little bit: i want them in these order: 'data' (which should be 'data' and 'Data Księgowania merged), 'debet_bc', 'obciążenia_bank', 'kredyt_bc', 'uznania_bank'
# if data is null in 'data' column, fill it with 'Data księgowania' column
merged['data'] = merged['data'].combine_first(merged['Data księgowania'])
merged = merged[['data', 'debet', 'kredyt', 'Obciążenia',  'Uznania']]
merged.columns = ['data', 'debet_bc', 'kredyt_bc', 'obciazenia_bank', 'uznania_bank']
# fill NaNs win last 4 columns with 0
merged = merged.fillna(0)

## BNP

In [None]:
bnp = pd.read_excel(f'{plik}.xlsx', sheet_name='bank', header=0)

In [None]:
# change colnames to data, Obciążenia, Uznania
bnp.columns = ['data', 'Obciążenia', 'Uznania', 'waluta']

In [None]:
bnp['Obciążenia'] = bnp['Obciążenia'].apply(lambda x: format_to_two_decimals(x) if x else Decimal('0'))
bnp['Uznania'] = bnp['Uznania'].apply(lambda x: format_to_two_decimals(x) if x else Decimal('0'))

In [None]:
merged = pd.merge(bc, bnp, left_on='data', right_on='data', how='outer')

In [None]:
merged = merged[['data', 'debet', 'kredyt', 'Obciążenia',  'Uznania']]
merged.columns = ['data', 'debet_bc', 'kredyt_bc', 'obciazenia_bank', 'uznania_bank']
merged = merged.fillna(0)

## All

In [None]:
# change data format to 'YYYY-MM-DD'
merged['data'] = merged['data'].dt.strftime('%Y-%m-%d')

In [None]:
# add columns 'debet-obciazenia' and 'kredyt-uznania'
merged['debet-obciazenia'] = merged['debet_bc'] - merged['obciazenia_bank']
merged['kredyt-uznania'] = merged['kredyt_bc'] - merged['uznania_bank']

In [None]:
# save to excel

merged.to_excel(f'merged_{plik}.xlsx', index=False)