In [None]:
# This notebook will take an export of raw mint transaction data and extract
# just the spending data found therein, adding a spending group to each transaction.
#
# From this data it will then generate detailed spending by category for each year of data found
# and then use the averages to predict future year and retirement spending needs.
#
# Finally a detailed report of year of year spending by category will be generated for each
# spending troup

# This first cell sets configuration variable that will be used herein

#################
# Input Files
#################
# File with raw mint transaction data
PATH_TO_YOUR_TRANSACTIONS = "transactions.csv"

# Input file describing which categories to group into which larger spending categories
# This is a CSV where the first row in each column is the name of a "Spending Group"
# The mint categories that belongs to each Spending Group are in listed in each column
# Categories not assigned to a group will be assigned a Spending Group that matches the Category
# See mint-spending-groups-template.csv for an example
PATH_TO_SPENDING_GROUPS = "./mint-spending-groups.csv"

# Input file describing Mint categories to exclude from spending analysis
# This is a CSV file that describes the Spending Groups whose transctions will be removed
# from the spending data.  This is typically Income, Credit Card Payments along with any
# other non spending data (ie: reimbursed expenses, transactions related to a rental property, etc)
# The format of the files is a Spending Group in the first column.  A second column can be set to False
# to hide a short printed analysis of the income and expense associated with the category
# See exclude-spending-group-template.csv for an example
PATH_TO_GROUPS_TO_EXCLUDE = "./exclude-from-spending-groups.csv"


#################
# Configuration
#################
# When predicting future spending, the tools will use the averages
# of previous year's spending.  In order to not skew the numbers
# partial year transaction data should be excluded from this analysis

# Set this to skip data from old or imcomplete years
IGNORE_YEARS_BEFORE = 2014

# Get the current year.  We'll exclude expenses in the current year from the Averages
# Simply set current year to a future year if you prefer to include this year in the average
import datetime
currentDateTime = datetime.datetime.now()
date = currentDateTime.date()
CURRENT_YEAR = int(date.strftime("%Y"))
# CURRENT_YEAR = 2999

# List of Spending Groups to remove from Projected Retirement Spending 
# List groups as strings, seperated by commas, with no space inbetween
EXCLUDE_FROM_RETIREMENT="Kids","Retirement Saving","State & Federal Taxes"

#################
# Output Files
#################
# Output file of just the spending data broken into spending categories
PATH_TO_SPENDING_DATA = 'spending.csv'

# Output file of the summarized spending by category
# This file is used as input to subsequent scripts
PATH_TO_SPENDING_BY_GROUP = 'group_spending.csv'



In [None]:
# Read in the dependencies
import pandas as pd
import sys
import webbrowser
import os

In [None]:
import extract_spending_data_methods as esd

# Read the raw mint transaction data into a dataframe
parse_dates = ['Date']
try:
    df = pd.read_csv(PATH_TO_YOUR_TRANSACTIONS, parse_dates=parse_dates)
    df.set_index(['Date'], inplace=True)
    df['Amount'] = df['Amount'].astype(float)
except BaseException as e:
    print('Failed to read mint transaction data: {}'.format(e))
    sys.exit(-1)


## Run through the transaction list from mint and add a Spending Group column
# Set the final parameter to True to get some output about which categories are being assigned to which group
try:
    df = esd.group_categories(df, PATH_TO_SPENDING_GROUPS, show_group_details=False)
except BaseException as e:
    sys.exit(-1)


In [None]:
## Create a Dataframe for each year of transaction data, 
# For each year find the categories that generated net income and keep them
all_df = pd.DataFrame()
for year in df.index.year.unique():        
    # Extract a years worth of spending data from the transaction data    
    # This will remove all transactions in Spending Groups defined in PATH_TO_GROUPS_TO_EXCLUDE
    # Generally these categories represent income or transactions like Credit Card Payments
    from_date = str(year-1) + '-12-31'
    to_date = str(year+1) + '-01-01'
    year_df = esd.extract_spending(df, PATH_TO_GROUPS_TO_EXCLUDE, from_date, to_date)
    
    # Loop through each of the spending groups and remove transactions for groups that did not generate income
#     for group in year_df['Spending Group'].unique():
#         if year_df[year_df['Spending Group'] == group].sum() > 0:
#             print('Keeping '+group)
#         else:
#             print('No income for '+group)


    # Add the years spending data to a running total dataframe
    column_title = str(year)+' Amount'
    if all_df.empty:
        all_df = year_df
        all_df.rename(columns={'Amount': column_title}, inplace=True)
    else:
        year_df.rename(columns={'Amount': column_title}, inplace=True)
        all_df =pd.concat([year_df, all_df])
    



In [None]:
# Write the raw spending transaction data to disk as a csv
all_df.to_csv(PATH_TO_SPENDING_DATA)
# Summarize expenses by category, by year
expenses = all_df.groupby(['Spending Group']).sum()
expenses.to_csv(PATH_TO_SPENDING_BY_GROUP)

In [None]:
# Visualize the spending for each year of data found
import visualization_methods as vms

# Create a dataframe from the annual spending by group data file
df = vms.read_structured_transactions(PATH_TO_SPENDING_BY_GROUP, \
    PATH_TO_YOUR_TRANSACTIONS, 'Spending Group', \
    'summarized spending group data')

# year over year visualizations we may have different categories each year
# Create an assigned color for each category so the colors are consistent
colors = vms.assign_colors_to_groups(df)

# Iterate through the columns which are formatted "YEAR Amount"
for col in df.columns:  
    year = col.split(' ', 1)[0]
    # Ignore years with dirty or incomplete data
    if int(year) < IGNORE_YEARS_BEFORE:
        continue
        
    year_df = df[year+' Amount']
    if not len(year_df):
        print('No data found for '+year)
        continue
    report_png = str(year)+'-spending-by-category.png'
    vms.visualize_expenses_by_group(year, year_df, colors)


In [None]:
# Generate an average annual spending to predict future needs

# Remove old and current year data to generate a good average
minyr = 3000 
maxyr = 1900
for col in df.columns:  
    year = int(col.split(' ', 1)[0])
    if year < IGNORE_YEARS_BEFORE or year == CURRENT_YEAR:
        del df[col]
    if year < minyr:
        minyr = year
    if year > maxyr:
        maxyr = year

# Create a new column with the average annual spending by group
df['Average'] = df.mean(numeric_only=True, axis=1)
# Drop spending groups that have an average of zero spending
df = df[df['Average'] != 0]
    
title = title = 'Average Annual Spending '+str(minyr)+" - "+str(maxyr)
vms.visualize_average_spending_by_group(df, title, colors)


In [None]:
# Remove certain spending groups that should not be applicable in retirement
# To predict spending needs in Retirement
ret_df = df
for group in EXCLUDE_FROM_RETIREMENT:
    ret_df = ret_df[ret_df.index != group]

vms.visualize_average_spending_by_group(ret_df, 'Projected Retirement Spending', colors)



In [None]:
# Build a "summary" dataframe that we can visulize as a table
sum_df = vms.build_summary_table(df, ret_df)
display(sum_df)

In [None]:
# Loop through each of the spending groups and show the year over year details
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format
for group in sorted(all_df['Spending Group'].unique()):
    group_df = vms.build_category_details(all_df, group)
    print('Details for ' + group + ' Spending')
    display(group_df)
    