In [None]:
# This notebook will take an export of raw mint transaction data and extract
# just the income data found therein, adding a spending group to each transaction.
#
# From this data it will then generate detailed spending by category for each year of data found
# and then use the averages to predict future year and retirement spending needs.
#
# Finally a detailed report of year of year spending by category will be generated for each
# spending troup

# This first cell sets configuration variable that will be used herein


#################
# Input Files - where to read source data and configuration files
#################
# File with raw mint transaction data
PATH_TO_YOUR_TRANSACTIONS = "transactions.csv"

# Input file describing which categories to group into which larger spending categories
# This is a CSV where the first row in each column is the name of a "Spending Group"
# The mint categories that belongs to each Spending Group are in listed in each column
# Categories not assigned to a group will be assigned a Spending Group that matches the Category
# See mint-spending-groups-template.csv for an example
PATH_TO_SPENDING_GROUPS = "./mint-spending-groups.csv"

# Input file describing Mint categories to exclude from spending analysis
# This is a CSV file that describes the Spending Groups whose transctions will be removed
# from the spending data.  This is typically Spendign Groups that have some credits
# but shouldn't be considered as income, such as Credit Card Payments, Transfer or 
# or potentially categories that are used to track reimbursable business expenses
PATH_TO_GROUPS_TO_EXCLUDE_FROM_INCOME = "./exclude-from-income-groups.csv"

#################
# Configuration  - define which data to ignore when predicting future spending needs
#################

# Set this to skip data from old or imcomplete years
IGNORE_YEARS_BEFORE = 2014


#################
# Output Files  - define where generated files should be written to
#################
# Output file of just the transactions that generated net income
OUTPUT_INCOME_DATA = 'income.csv'

# Output file of the summarized income by category
OUTPUT_INCOME_BY_SPENDING_BY_GROUP = 'group_income.csv'




In [None]:
# Read in the dependencies
import pandas as pd
import sys
import webbrowser
import os

In [None]:
import extract_spending_data_methods as esd

# Read the raw mint transaction data into a dataframe
parse_dates = ['Date']
try:
    df = pd.read_csv(PATH_TO_YOUR_TRANSACTIONS, parse_dates=parse_dates)
    df.set_index(['Date'], inplace=True)
    df['Amount'] = df['Amount'].astype(float)
except BaseException as e:
    print('Failed to read mint transaction data: {}'.format(e))
    sys.exit(-1)


## Run through the transaction list from mint and add a Spending Group column
# Set the final parameter to True to get some output about which categories are being assigned to which group
try:
    df = esd.group_categories(df, PATH_TO_SPENDING_GROUPS, show_group_details=False)
except BaseException as e:
    sys.exit(-1)


In [None]:
## Create a Dataframe for each year of transaction data, 
# For each year find the categories that generated net income and keep them
all_df = pd.DataFrame()
for year in df.index.year.unique():        
    # Extract a years worth of spending data from the transaction data    
    # This will remove all transactions in Spending Groups defined in PATH_TO_GROUPS_TO_EXCLUDE
    # Generally these categories represent income or transactions like Credit Card Payments
    from_date = str(year-1) + '-12-31'
    to_date = str(year+1) + '-01-01'
    year_df = esd.extract_income(df, PATH_TO_GROUPS_TO_EXCLUDE_FROM_INCOME, from_date, to_date)
    
    # Add the years spending data to a running total dataframe
    column_title = str(year)+' Amount'
    if all_df.empty:
        all_df = year_df
        all_df.rename(columns={'Amount': column_title}, inplace=True)
    else:
        year_df.rename(columns={'Amount': column_title}, inplace=True)
        all_df =pd.concat([year_df, all_df])
    

In [None]:

# Write the raw income transaction data to disk as a csv
all_df.to_csv(OUTPUT_INCOME_DATA)
# Summarize income by category, by year
income = all_df.groupby(['Spending Group']).sum()
income.to_csv(OUTPUT_INCOME_BY_SPENDING_BY_GROUP)

In [None]:
# Visualize the income for each year of data found
import visualization_methods as vms

# Create a dataframe from the annual income by group data file
df = vms.read_structured_transactions(OUTPUT_INCOME_BY_SPENDING_BY_GROUP, \
    PATH_TO_YOUR_TRANSACTIONS, 'Spending Group', \
    'summarized income group data')

# year over year visualizations we may have different categories each year
# Create an assigned color for each category so the colors are consistent
colors = vms.assign_colors_to_groups(df)

# Iterate through the columns which are formatted "YEAR Amount"
for col in df.columns:  
    year = col.split(' ', 1)[0]
    # Ignore years with dirty or incomplete data
    if int(year) < IGNORE_YEARS_BEFORE:
        continue
        
    year_df = df[year+' Amount']
    if not len(year_df):
        print('No data found for '+year)
        continue
    vms.visualize_expenses_by_group(year, year_df, colors, spending=False)


In [None]:
# Build a "summary" dataframe that we can visulize as a table
sum_df = vms.build_summary_table(df)
display(sum_df)

In [None]:
# Loop through each of the spending groups and show the year over year details
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format
for group in sorted(all_df['Spending Group'].unique()):
    group_df = vms.build_category_details(all_df, group)
    print('Details for ' + group + ' Income')
    display(group_df)
    
