# Data preprocess

In [None]:
from expenses_preprocess import *
import os

DATA_PATH = "./data"
PERSON_WHO_PAYS = "Lucas Alencar"

# Map with categories from Nubank/Splitwise to official categories
CATEGORY_CONVERSION_TABLE = {
    'Eletrônicos': 'Compras',
    'Supermercado': 'Mercado',
    'Vestuário': 'Compras',
    'Jantar fora': 'Restaurante',
    'Ônibus/trem': 'Transporte',
    'Filmes': 'Lazer',
    'Despesas médicas': 'Saúde',
    'Aluguel': 'Casa',
    'Táxi': 'Transporte'
}

# Column names to convert Splitwise
SPLITWISE_COLUMN_NAMES = {
    'Data': 'date',
    'Descrição': 'title',
    'Categoria': 'category',
}

# NUBANK PREPROCESS
nubank_expenses = nubank_preprocess(read_all_csvs(DATA_PATH, 'nubank*.csv'))

# SPLITWISE PREPROCESS
splitwise = read_first_file_found(DATA_PATH, "Splitwise*.csv")
splitwise = splitwise_preprocess(splitwise, PERSON_WHO_PAYS, SPLITWISE_COLUMN_NAMES)

# OTHER EXPENSES PREPROCESS
other_expenses = read_first_file_found(DATA_PATH, "Outras*contas*Despesas*.csv")

# INCOMES PREPROCESS
incomes = read_first_file_found(DATA_PATH, "Outras*contas*Rendimentos*.csv")
incomes = incomes_preprocess(incomes)

# Data consolidation and exports
expenses = expenses_preprocess(pd.concat([nubank_expenses, splitwise, other_expenses]), CATEGORY_CONVERSION_TABLE)
export_to_sheets(expenses, DATA_PATH)
export_raw_expenses(expenses, DATA_PATH)

print("Ok")

# Expenses analysis

In [None]:
import pandas as pd

def amount_by_month(data):
    return data.groupby(data['date'].dt.strftime("%Y-%m")).amount.sum().to_frame()

expenses.set_index('date')

# Expenses and incomes by month
exp_by_month = amount_by_month(expenses)
inc_by_month = amount_by_month(incomes)

# pd.concat([exp_by_month, inc_by_month], axis=1)
print(inc_by_month)
print(exp_by_month)

inc_by_month + exp_by_month