In [186]:
import re
import csv


closing_day = "03"
current_month = "02"
payment_month = str(int(current_month)+1).zfill(2)

filename = 'Fatura-C6.csv'
card = 'C6'

c6 = {'date': "Data de Compra", 'buyer': "Nome no Cartão", 'description': "Descrição", 'value': "Valor (em R$)", 'installments': "Parcela", 'singleInstallment': 'Única'}

# '5.661,03' -> 5661.03
def localeStringToFloat(value):
    return float(value.replace('.','').replace(',','.'))

def formatInstallments(installments):
    separator = "/"
    return separator.join([num.zfill(2) for num in installments.split(separator)])

def formatExpenseDict(date, description, buyer, value, installments):
    fullDescription = description if installments == c6["singleInstallment"] else description + " " + formatInstallments(installments)
    return {'date': date, 'category': "",  'buyer': buyer, 'description': fullDescription, 'value': localeStringToFloat(value)}

expenses = []
# Read expenses into memory from file
try:
    with open(filename, "r") as file:
        reader = csv.DictReader(file, delimiter=";")
        for row in reader:
            expenses.append(
                formatExpenseDict(row[c6["date"]],
                                  row[c6["description"]],
                                  row[c6["buyer"]],
                                  row[c6["value"]],
                                  row[c6["installments"]]
                                 )
            )
except FileNotFoundError:
    sys.exit(f"File {filename} not found, please use an existing file.")

print("{0} expenses between {1}-{2}".format(len(expenses), closing_day+'/'+current_month, closing_day+'/'+payment_month))

45 expenses between 03/02-03/03


In [187]:
from datetime import date

# REMOVING EXPENSES BEFORE SELECTED DATE
# Used for mid-month expense tracking
IGNORE_EXPENSES_BEFORE_DATE = '12/02/2023'

def transform_date(str_date = '31/12/2021'):
    day, month, year = str_date.rsplit('/')
    return date(int(year), int(month), int(day))

last_date = transform_date(IGNORE_EXPENSES_BEFORE_DATE)

for idx, expense in enumerate(expenses):
    indexes_to_remove = []

    current_date = transform_date(expense['date'])
    if (current_date < last_date):
        indexes_to_remove.append(idx)
    
    # the removal operation had to be separated because it wasn't fully functional inside the loop above
    # it is reversed in order to not mess up with the indexes
    for jdx in reversed(indexes_to_remove):
        expenses.pop(jdx)

In [189]:
# CLEAR REFUNDS DISGUISED AS EXPENSES
for expense in expenses:
    if (expense['value'] < 0):
        refund = expense
        # searches refunded expense retroactively
        for jdx in reversed(range(len(expenses))):
            if (abs(refund['value']) == expenses[jdx]['value']):
                refundedExpense = expenses[jdx]
                print('REFUNDED', refund['description'], refundedExpense['value'])
                expenses.remove(refund)
                expenses.remove(refundedExpense)
                break

# For some reason they are not deleted at once
# Run this block until we receive an empty "AWAITING REFUND" array
[print('AWATING REFUND', expense['description'], expense['value']) for expense in expenses if expense['value'] < 0]

REFUNDED PAYPAL       DELL 274.87
REFUNDED Estorno Tarifa 85.0


[]

In [190]:
# Here we want to see only expenses (not refunds) and after the ignored dates

from prettytable import PrettyTable
t = PrettyTable(['Date', 'Buyer', 'Value', 'Description'])
for expense in expenses:
    t.add_row([expense['date'], expense['buyer'], expense['value'], expense['description']])
print(t)

+------------+-------+--------+------------------------------+
|    Date    | Buyer | Value  |         Description          |
+------------+-------+--------+------------------------------+
| 14/02/2023 | Vitor | 566.01 |       AMAZON BR 01/10        |
| 14/02/2023 | Vitor | 245.82 |   AMAZON MARKETPLACE 01/03   |
| 15/02/2023 | Vitor | 330.0  |      REDE ORTOESTETICA       |
| 15/02/2023 | Vitor |  90.0  | FLORICULTURA ANGELS FL 01/02 |
| 16/02/2023 | Vitor | 147.25 |   AMAZON MARKETPLACE 01/03   |
| 16/02/2023 | Vitor | 84.24  |  PP      GORILSHIELD 01/02   |
| 16/02/2023 | Vitor | 196.08 |       CARONE GAIVOTAS        |
| 17/02/2023 | Vitor | 44.41  |       PADARIA DA PRAIA       |
| 17/02/2023 | Vitor |  23.5  |      BLU FOOD PARK LTDA      |
| 17/02/2023 | Vitor | 191.9  |         LA CUCHILLA          |
| 17/02/2023 | Vitor |  12.0  |    COOKIE IT CAFE VILA VE    |
| 18/02/2023 | Vitor | 79.91  |    PAG SHOPPINGVILAVELHA     |
| 18/02/2023 | Vitor |  4.0   |    PG COMERCIO DE ALIME

## Convert to CSV, for Notion

Now that expenses are parsed, the last step involves:
- Translating texts for a more semantic representation
- Atributing the remaining csv fields, such as categories and transfers
- Then joining the expenses to generate the csv file

In [191]:
categories = {
    'alimentacao': 'Alimentação',
    'bar': 'Bar/Bebida',
    'casa': 'Casa',
    'delivery': 'Delivery',
    'juju': 'Juju',
    'lanches': 'Lanches',
    'lazer': 'Lazer',
    'pessoal': 'Pessoal',
    'restaurante': 'Restaurante',
    'saude':'Saúde',
    'streaming': 'Streaming',
    'transporte': 'Transporte',
    'viagens': 'Viagens',
}

translate_dict = {'99*99': {'description': '99 Pay', 'category': categories['casa']},
                'vivo': {'description': 'Vivo', 'category': categories['pessoal']},
                'tbra': {'description': 'Vivo', 'category': categories['pessoal']},
                'uber': {'description': 'Uber', 'category': categories['transporte']},
                'posto ilha': {'description': 'Gasolina', 'category': categories['transporte']},
                'shellbox': {'description': 'Gasolina ShellBox', 'category': categories['transporte']},
                'esfiha': {'description': 'Esfiha', 'category': categories['lanches']},
                'bakery': {'description': 'Padaria', 'category': categories['alimentacao']},
                'cr comercial': {'description': 'Padaria Manos', 'category': categories['alimentacao']},
                'pastel': {'description': 'Pastel', 'category': categories['lanches']},
                'ifood': {'description': 'Ifood', 'category': categories['delivery']},
                'carone': {'description': 'Supermercado Carone', 'category': categories['alimentacao']},
                'extrabom': {'description': 'Supermercado Extrabom', 'category': categories['alimentacao']},
                'supermercado-ct im': {'description': 'Supermercado', 'category': categories['alimentacao']},
                'embutidos lo': {'description': 'Queijo Feira', 'category': categories['alimentacao']},
                '40 sab': {'description': 'Sorvete 40 Sab', 'category': categories['lanches']},
                'clubew': {'description': 'Wine', 'category': categories['bar']},
                'spotify': {'description': 'Spotify', 'category': categories['streaming']},
                'farma': {'description': 'Farmácia', 'category': categories['saude']},
                'drogaria': {'description': 'Farmácia', 'category': categories['saude']},
                'petz': {'description': 'Juju', 'category': categories['juju']},
                'wendel fialh': {'description': 'Material Construção', 'category': categories['casa']},
                'rnfastfood': {'description': 'Ifood', 'category': categories['lanches']},
                'rnfastfo': {'description': 'Churrasquinho Praça', 'category': categories['lanches']},
                'tribbu': {'description': 'Açaí Tribbu', 'category': categories['lanches']},
                'beertr': {'description': 'Rota Beer', 'category': categories['bar']},
                'gussani': {'description': 'Gussani', 'category': categories['restaurante']},
                 }

In [192]:
def get_translate_item(item_text):
    not_found_item = {'description': item_text, 'category': ''}
    dict_item = [value for key, value in translate_dict.items() if key in item_text.lower()]
    
    return dict_item[0] if len(dict_item) else not_found_item

def get_pay_date(date):
    return transform_date(date).strftime("%m/%d/%Y")

In [193]:
header = ['Mes', 'Pagamento', 'Item', 'Categoria', 'Quem pagou', 'Valor', 'Quem transfere?', 'Cartão']
data = []

for idx,expense in enumerate(expenses):
    pay_date = get_pay_date(expense['date'])
    
    translated_item = get_translate_item(expense['description'])
    description = translated_item['description']
    category = translated_item['category']
    
    buyer = expense['buyer']
    value = expense['value']
    who_transfers = ''
        
    expense_row = [current_month, pay_date, description, category, buyer, value, who_transfers, card]
    data.append(expense_row)

In [194]:
import csv

with open('expenses-c6.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(header)

    # write multiple rows
    writer.writerows(data)