In [None]:
import pandas as pd
from collections import Counter

# read the input files
bank = pd.read_excel('bank.xlsx', header=None)
bc = pd.read_excel('bc.xlsx')

# get the column names (bank has column 0, bc uses its first column)
bank_col = bank.columns[0]
bc_col = bc.columns[0]

# count occurrences in each dataframe
bank_counts = Counter(bank[bank_col])
bc_counts = Counter(bc[bc_col])

# determine matches: for each common value use the minimum count
matches_list = []
for value in (set(bank_counts.keys()) & set(bc_counts.keys())):
    num_matches = min(bank_counts[value], bc_counts[value])
    matches_list.extend([value] * num_matches)

matches = pd.DataFrame(matches_list, columns=[bank_col])

# create remaining series for bank and bc by subtracting the matched occurrences
def get_remaining(df, col, counts, matches_counter):
    # make a copy of counts so we don't modify the original
    remaining_counts = counts.copy()
    for value, count in matches_counter.items():
        remaining_counts[value] -= count
    # reconstruct remaining rows
    remaining_rows = []
    for value, count in remaining_counts.items():
        if count > 0:
            remaining_rows.extend([value]*count)
    return pd.DataFrame(remaining_rows, columns=[col])

matches_counter = Counter(matches_list)
remaining_bank = get_remaining(bank, bank_col, bank_counts, matches_counter)
remaining_bc = get_remaining(bc, bc_col, bc_counts, matches_counter)

# combine the remaining values from both dataframes
remaining = pd.concat([remaining_bank, remaining_bc], ignore_index=True)

# save the matches and remaining dataframes to one excel file with 2 sheets
with pd.ExcelWriter('matches_and_remaining.xlsx') as writer:
    matches.to_excel(writer, sheet_name='matches', index=False)
    remaining.to_excel(writer, sheet_name='remaining', index=False)

In [5]:
import pandas as pd
import re
from collections import Counter

def parse_split_amounts(title: str) -> list[float]:
    """
    Wyciąga z tytułu wszystkie kwoty (float) w różnych formatach:
    - liczby z przecinkiem lub kropką dziesiętną, np. 1234,56 lub 1234.56
    - liczby całkowite do 6 cyfr (pomijając 7+ cyfrowe rezerwacje)
    Pomija sekwencje 7+ cyfr bez separatorów (numery rezerwacji).
    """
    parts = re.findall(r'[\d\.,]+', title)
    cleaned = []
    for p in parts:
        raw = p.replace(' ', '')
        if re.fullmatch(r'\d+', raw) and len(raw) >= 7:
            continue
        if ',' in raw and raw.count('.') == 0:
            norm = raw.replace('.', '').replace(',', '.')
        else:
            norm = raw.replace(',', '')
        try:
            cleaned.append(float(norm))
        except ValueError:
            continue
    return cleaned

# 1) Wczytanie plików
df_bank = pd.read_excel('bank.xlsx', header=None, names=['amount', 'title'])
df_bc   = pd.read_excel('bc.xlsx',   header=0,    names=['amount'])

# 2) Konwersja na float
df_bank['amount'] = df_bank['amount'].astype(float)
df_bc['amount']   = df_bc['amount'].astype(float)

# 3) IDENTICAL MATCHES
bank_ct = Counter(df_bank['amount'])
bc_ct   = Counter(df_bc['amount'])
common  = set(bank_ct) & set(bc_ct)

identical_list = []
for val in common:
    n = min(bank_ct[val], bc_ct[val])
    identical_list += [val] * n
identical_df = pd.DataFrame({'amount': identical_list})

# Usuń identyczne z obu ramek
i_bank_idx = []
i_bc_idx   = []
for val in common:
    n = min(bank_ct[val], bc_ct[val])
    i_bank_idx += df_bank[df_bank['amount'] == val].index.tolist()[:n]
    i_bc_idx   += df_bc[df_bc['amount']   == val].index.tolist()[:n]
df_rem_bank = df_bank.drop(index=i_bank_idx).reset_index(drop=True)
df_rem_bc   = df_bc.drop(index=i_bc_idx).reset_index(drop=True)

# 4) SPLIT MATCHES
split_matches = []
used_bc_idx = []
tol = 1e-2
bc_counts = Counter(df_rem_bc['amount'])

for i, row in df_rem_bank.iterrows():
    parts = parse_split_amounts(row['title'])
    total = sum(parts)
    if abs(total - row['amount']) > tol:
        continue
    parts_ct = Counter(parts)
    if not all(bc_counts[val] >= cnt for val, cnt in parts_ct.items()):
        continue
    matched_idxs = []
    for val, cnt in parts_ct.items():
        idxs = df_rem_bc[df_rem_bc['amount'] == val].index.tolist()[:cnt]
        matched_idxs += idxs
    split_matches.append({
        'bank_amount': row['amount'],
        'bank_title':  row['title'],
        'bc_amounts':  "; ".join(f"{val:.2f}" for val in parts)
    })
    for val, cnt in parts_ct.items():
        bc_counts[val] -= cnt
    used_bc_idx += matched_idxs

split_df = pd.DataFrame(split_matches, columns=['bank_amount', 'bank_title', 'bc_amounts'])

# 5) Usuń dopasowane
matched_bank = split_df['bank_amount'].tolist() if not split_df.empty else []
rem_bank2    = df_rem_bank[~df_rem_bank['amount'].isin(matched_bank)].reset_index(drop=True)
rem_bc2      = df_rem_bc.drop(index=used_bc_idx).reset_index(drop=True)

# 6) REMAINING – wyrównanie i scalenie
max_len = max(len(rem_bank2), len(rem_bc2))
rb = rem_bank2.reindex(range(max_len))
rc = rem_bc2.reindex(range(max_len))
remaining_df = pd.concat([
    rb.rename(columns={'amount':'bank_amount','title':'bank_title'}),
    rc.rename(columns={'amount':'bc_amount'})
], axis=1)

# 7) Zapis do Excela
with pd.ExcelWriter('matches_and_remaining.xlsx') as writer:
    identical_df .to_excel(writer, sheet_name='identical_matches', index=False)
    split_df     .to_excel(writer, sheet_name='split_matches',     index=False)
    remaining_df .to_excel(writer, sheet_name='remaining',         index=False)

print('Gotowe! Plik matches_and_remaining.xlsx wygenerowany.')

Gotowe! Plik matches_and_remaining.xlsx wygenerowany.
