# Data preprocess

In [None]:
from expenses_preprocess import *

DATA_PATH = "./data"
PERSON_WHO_PAYS = "Lucas Alencar"

# Map with categories from Nubank/Splitwise to official categories
CATEGORY_CONVERSION_TABLE = {
    'Eletrônicos': 'Compras',
    'Supermercado': 'Mercado',
    'Vestuário': 'Compras',
    'Jantar fora': 'Restaurante',
    'Ônibus/trem': 'Transporte',
    'Filmes': 'Lazer',
    'Despesas médicas': 'Saúde',
    'Aluguel': 'Casa',
    'Táxi': 'Transporte'
}

# Column names to convert Splitwise
SPLITWISE_COLUMN_NAMES = {
    'Data': 'date',
    'Descrição': 'title',
    'Categoria': 'category',
}

# NUBANK PREPROCESS
nubank_expenses = nubank_preprocess(read_all_csvs(DATA_PATH, '/nubank*.csv'), CATEGORY_CONVERSION_TABLE)

# SPLITWISE PREPROCESS
splitwise = pd.read_csv(glob.glob(DATA_PATH + "/Splitwise*.csv")[0])
splitwise = splitwise_preprocess(splitwise, CATEGORY_CONVERSION_TABLE)
splitwise = splitwise_focused_on(PERSON_WHO_PAYS, splitwise, SPLITWISE_COLUMN_NAMES)

# OTHER ACCOUNTS PREPROCESS
other_accounts = pd.read_csv(glob.glob(DATA_PATH + "/Despesas*.csv")[0])
other_accounts = other_accounts_preprocess(other_accounts)

# Data consolidation and exports
expenses = pd.concat([nubank_expenses, splitwise, other_accounts])[['date', 'title', 'category', 'amount']]
export_to_sheets(expenses, DATA_PATH)
export_raw_expenses(expenses, DATA_PATH)

print("Ok")

# Expenses analysis

In [None]:
import pandas as pd

expenses = pd.read_csv("./data/raw_expenses.csv", parse_dates=['date'])
expenses.set_index('date')

# Expenses for each month
expenses.groupby(expenses['date'].dt.strftime("%Y-%m")).amount.sum()