# Data preprocess

In [None]:
from expenses_preprocess import *
import os

DATA_PATH = "./data"
PERSON_WHO_PAYS = "Lucas Alencar"

# Map with categories from Nubank/Splitwise to official categories
CATEGORY_CONVERSION_TABLE = {
    'Eletricidade': 'Casa',
    'Aluguel': 'Casa',
    'Móveis': 'Casa',
    'Jantar fora': 'Restaurante',
    'Supermercado': 'Mercado',
    'Eletrônicos': 'Compras',
    'Vestuário': 'Compras',
    'Produtos de limpeza': 'Compras',
    'Filmes': 'Lazer',
    'Música': 'Lazer',
    'TV/Telefone/Internet': 'Serviços',
    'Despesas médicas': 'Saúde',
    'Táxi': 'Transporte',
    'Ônibus/trem': 'Transporte',
    'Avião': 'Viagem',
    'Hotel': 'Viagem',
    'Presentes': 'Outros',
    'Geral': 'Outros',
    'Ajuste': 'Outros'
}

# Column names to convert Splitwise
SPLITWISE_COLUMN_NAMES = {
    'Data': 'date',
    'Descrição': 'title',
    'Categoria': 'category',
}

# NUBANK PREPROCESS
nubank_expenses = nubank_preprocess(read_all_csvs(DATA_PATH, 'nubank*.csv'))

# SPLITWISE PREPROCESS
splitwise = read_first_file_found(DATA_PATH, "Splitwise*.csv")
splitwise = splitwise_preprocess(splitwise, PERSON_WHO_PAYS, SPLITWISE_COLUMN_NAMES)

# OTHER EXPENSES PREPROCESS
other_expenses = read_first_file_found(DATA_PATH, "Outras*contas*Despesas*.csv")

# INCOMES PREPROCESS
incomes = read_first_file_found(DATA_PATH, "Outras*contas*Rendimentos*.csv")
incomes = incomes_preprocess(incomes)

# Data consolidation and exports
expenses = expenses_preprocess(pd.concat([nubank_expenses, splitwise, other_expenses]), CATEGORY_CONVERSION_TABLE)
export_to_sheets(expenses, DATA_PATH)
export_raw_expenses(expenses, DATA_PATH)

print("Ok")

# Monthly Expenses

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

from IPython.display import display
from monthly_expenses import *

MONTHLY_BALANCE_GOAL = 0.25

# Expenses and incomes by month
exp_by_month = amount_by_month(expenses)
inc_by_month = amount_by_month(incomes)
monthly_exp = summary_expenses(exp_by_month, inc_by_month)

# Monthly averages
avg_monthly_exp = summary_expenses(exp_by_month.mean(), inc_by_month.mean())
avg_monthly_exp.index = ['Average monthly expenses']

print(" >>> Monthly balance goal:", MONTHLY_BALANCE_GOAL * 100, "%")
display(style_summary_expenses(monthly_exp, MONTHLY_BALANCE_GOAL))
display(style_summary_expenses(avg_monthly_exp, MONTHLY_BALANCE_GOAL))

# Plot expenses summary
plt.figure(figsize=(20,10))
plt.plot(monthly_exp['Expenses'] * -1, 'r', label='Expenses')
plt.plot(monthly_exp['Incomes'], 'g', label='Incomes')
plt.plot(monthly_exp['Balance'], 'b', label='Balance')
plt.legend(fontsize=20)
plt.grid()
plt.show()

# Category analysis

In [None]:
from datetime import datetime, date, time
import calendar

def month_day_range(date):
    """
    For a date 'date' returns the start and end date for the month of 'date'.

    Month with 31 days:
    >>> date = datetime.date(2011, 7, 27)
    >>> get_month_day_range(date)
    (datetime.date(2011, 7, 1), datetime.date(2011, 7, 31))

    Month with 28 days:
    >>> date = datetime.date(2011, 2, 15)
    >>> get_month_day_range(date)
    (datetime.date(2011, 2, 1), datetime.date(2011, 2, 28))

    https://gist.github.com/waynemoore/1109153
    """
    first_day = date.replace(day=1)
    last_day = date.replace(day=calendar.monthrange(date.year, date.month)[1])
    return first_day, last_day

def records_for_month(records, base_date):
    month_range = month_day_range(base_date)
    return records[(records['date'] >= month_range[0]) & (records['date'] <= month_range[1])]

def total_amount_by_category(records):
    return records.groupby('category').sum().sort_values('amount')
    
def expenses_distribution(expenses):
    total_expenses_by_category = total_amount_by_category(expenses)
    total_expenses_by_category = total_expenses_by_category[total_expenses_by_category.amount < 0]
    total_spent_on_month = expenses.amount.sum()
    return total_expenses_by_category / total_spent_on_month
    
BASE_DATE = date(2018, 4, 1)

expenses_for_month = records_for_month(expenses, BASE_DATE)
expenses_for_category_by_total = expenses_distribution(expenses_for_month)
display(expenses_for_category_by_total)

expenses_for_category_by_total.sort_values('amount', ascending=False).plot.bar(figsize=(20,10), fontsize=18, grid=True)