In [1]:
import dataclasses
import pathlib
import json
import pandas as pd
import os


os.chdir(pathlib.Path.cwd().parents[0])


LEDGER_PATH = "~/Nextcloud/Note/Finanze/ledger/2022.csv"
MMEX_PATH = "~/Nextcloud/Note/Finanze/ledger/2021_mmex.csv"
EXAMPLE_MMEX_PATH = "data/test.csv"
OUTPUT_PATH = "data/test_import.csv"

@dataclasses.dataclass
class LedgerCols:
    DATE: str = "DATE"
    BOH: str = "BOH"
    DESCRIPTION: str = "DESCRIPTION"
    CATEGORY: str = "CATEGORY"
    CURRENCY: str = "CURRENCY"
    AMOUNT: str = "AMOUNT"
    BOH2: str = "BOH2"
    BOH3: str = "BOH3"
    
@dataclasses.dataclass
class MMEXCols:
    Data: str = "Data"
    Conto: str = "Conto"
    Beneficiario: str = "Beneficiario"
    Importo: str = "Importo"
    Valuta: str = "Valuta"
    Categoria: str = "Categoria"
    SottoCategoria: str = "Sotto-Categoria"
    Etichette: str = "Etichette"
    Note: str = "Note"
    Prelievo: str = "Prelievo"
    Deposito: str = "Deposito"
    
ledger_cols = LedgerCols() 
mmex_cols = MMEXCols()

with pathlib.Path("/media/paolo/Kingston SSD/ledger-to-mmex/data/mapped_categories.json").open("r") as f:
    mapped_categories: dict[str, str] = json.load(f)

In [2]:
conto_map: dict[str, str] = {
    "Intesa XME": "Intesa",
    "Contanti Sant'Arcangelo": "Casa"
}

In [3]:
ledger = pd.read_csv(LEDGER_PATH, header=None, names=[x.name for x in dataclasses.fields(LedgerCols())]).drop(columns=[ledger_cols.BOH3, ledger_cols.BOH2, ledger_cols.BOH])
ledger["AMOUNT_NORM"] = abs(ledger["AMOUNT"])
ledger

Unnamed: 0,DATE,DESCRIPTION,CATEGORY,CURRENCY,AMOUNT,AMOUNT_NORM
0,2022/01/01,Starting balances,Assets:Banca:Contanti,€,255.08,255.08
1,2022/01/01,Starting balances,Assets:Banca:Contanti Sant'Arcangelo,€,2080.00,2080.00
2,2022/01/01,Starting balances,Assets:Banca:Hype,€,253.58,253.58
3,2022/01/01,Starting balances,Assets:Banca:N26,€,0.26,0.26
4,2022/01/01,Starting balances,Assets:Banca:Postepay,€,7.20,7.20
...,...,...,...,...,...,...
1076,2022/12/29,Soldi che erano nel portafogli che mi ha regal...,Guadagni:Altro,€,-51.00,51.00
1077,2022/12/30,libro claudia mafia,Spese:Regali,€,1.50,1.50
1078,2022/12/30,libro claudia mafia,Assets:Banca:Contanti,€,-1.50,1.50
1079,2022/12/31,Imposta di bollo,Spese:Spese:Tasse,€,2.90,2.90


In [4]:
indices = ledger.groupby([ledger_cols.DATE, ledger_cols.DESCRIPTION, "AMOUNT_NORM"]).indices.values()

def detect_transaction_type(accounts: list[str]):
    accounts = [acc.split(':')[0].lower() for acc in accounts]
    for acc in accounts:
        if acc == "guadagni":
            return "Deposit"
        if acc == "spese":
            return "Withdrawal"
    return "Transfer"

def extract_category(accounts: list[str], transaction_type: str) -> tuple[str, str]:
    if transaction_type == "Transfer":
        return "Trasferimento", "Trasferimento"
    
    for account in accounts:
        if "Assets" in account:
            continue
        
        splits = mapped_categories[account].split(":")
        if len(splits) == 1:
            return splits[0], ""
        return ":".join(splits[:-1]), splits[-1]
        
def extract_account(accounts: list[str]) -> str:
    for account in accounts:
        if "Assets" in account:
            return account.split(':')[-1]
    return ""

def extract_transaction_account_payee(transaction: pd.DataFrame) -> tuple[str, str]:
    payee = None
    account = None
    for _, row in transaction.iterrows():
        if row[ledger_cols.AMOUNT] < 0:
            account = row[ledger_cols.CATEGORY].split(':')[-1]
        if row[ledger_cols.AMOUNT] > 0:
            payee = f"> {row[ledger_cols.CATEGORY].split(':')[-1]}"
    
    return account, payee

def process_transaction(dataframe: pd.DataFrame):
    date: str = dataframe[ledger_cols.DATE][0].replace("/", "-")
    transaction_type: str = detect_transaction_type(dataframe[ledger_cols.CATEGORY].tolist())
    currency: str = "EUR" #dataframe[ledger_cols.CURRENCY][0]
    category, sub_category = extract_category(dataframe[ledger_cols.CATEGORY].tolist(), transaction_type)
    amount: float = dataframe["AMOUNT_NORM"][0] if transaction_type != "Withdrawal" else -dataframe["AMOUNT_NORM"][0]
    note: str = dataframe[ledger_cols.DESCRIPTION][0]
    
    if transaction_type == "Transfer":
        account, payee = extract_transaction_account_payee(dataframe[[ledger_cols.CATEGORY, ledger_cols.AMOUNT]])
    else:
        account: str = extract_account(dataframe[ledger_cols.CATEGORY].tolist())
        payee: str = "None"

    return [date, "R", transaction_type, account, payee, amount, currency, category, sub_category, note]

processed_transactions = []
for idx in indices:
    if len(idx) != 2:
        continue
    processed_transactions.append(process_transaction(ledger.loc[idx].reset_index(drop=True)))


In [5]:
mmex_test = pd.read_csv(EXAMPLE_MMEX_PATH)
mmex_test.insert(1, "Stato", "R")
mmex_test.columns

Index(['Data', 'Stato', 'Tipo', 'Conto', 'Beneficiario', 'Importo', 'Valuta',
       'Categoria', 'Sotto-Categoria', 'Note'],
      dtype='object')

In [6]:
processed_dataframe = pd.DataFrame(processed_transactions, columns=mmex_test.columns) #.to_csv(OUTPUT_PATH, index=False)

In [7]:
processed_dataframe

Unnamed: 0,Data,Stato,Tipo,Conto,Beneficiario,Importo,Valuta,Categoria,Sotto-Categoria,Note
0,2022-01-01,R,Deposit,Contanti,,50.00,EUR,Entrate,Regali,Regalo Zia Anna
1,2022-01-04,R,Withdrawal,Revolut,,-25.40,EUR,Informatica,Altro,Smartwatch nuovo
2,2022-01-04,R,Withdrawal,Contanti,,-10.00,EUR,Cibo,Mangiare fuori,StikHouse Anna Marù
3,2022-01-05,R,Withdrawal,Contanti,,-4.00,EUR,Cibo:Mangiare fuori,Colazione,Colazione Marù
4,2022-01-07,R,Transfer,Contanti Sant'Arcangelo,> Contanti,400.00,EUR,Trasferimento,Trasferimento,Trasferimento da Contanti Sant'Arcangelo a Con...
...,...,...,...,...,...,...,...,...,...,...
532,2022-12-26,R,Transfer,Intesa XME,> Contanti,26.98,EUR,Trasferimento,Trasferimento,Trasferimento da Intesa XME a Contanti
533,2022-12-27,R,Deposit,Contanti Sant'Arcangelo,,100.00,EUR,Entrate,Regali,Regalo Mamma
534,2022-12-29,R,Deposit,Contanti,,51.00,EUR,Altre Entrate,,Soldi che erano nel portafogli che mi ha regal...
535,2022-12-30,R,Withdrawal,Contanti,,-1.50,EUR,Regali,,libro claudia mafia


In [8]:
import re

def convert_conto_name(old_conto: str) -> str:
    for k in conto_map:
        if k in old_conto:
            return re.sub(k, conto_map[k], old_conto)

    return old_conto

processed_dataframe["Beneficiario"] = processed_dataframe["Beneficiario"].apply(convert_conto_name)
processed_dataframe["Conto"] = processed_dataframe["Conto"].apply(convert_conto_name)
processed_dataframe["Note"] = processed_dataframe["Note"].apply(convert_conto_name)

In [9]:
for conto in processed_dataframe.Conto.unique():
    conto_output_path = (pathlib.Path("data/export") / conto).with_suffix(".csv")
    conto_transfer_path = (pathlib.Path("data/export") / f"{conto}_transfer").with_suffix(".csv")
    
    conto_df = processed_dataframe[(processed_dataframe.Note.str.contains(conto)) | (processed_dataframe.Conto == conto)]
    
    conto_df[conto_df.Tipo != "Transfer"].to_csv(conto_output_path, index=False, header=None)
    
    conto_df[conto_df.Tipo == "Transfer"].to_csv(conto_transfer_path, index=False, header=None)
    
#     break
# tmp