# My Dinning Spending Habits
The purpose of this notebook is to examine my dinning spending habits during my years in college. I will be using data I receieving from Rit Dining from 08/2021 to 1/2022.

## Imports

In [None]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## Step One: Gathering the Data

In [None]:
august_df = pd.read_csv('sample-data/Dining Dollars (Meal Plan)_statement_2021-08-01_to_2021-08-31.csv')
september_df = pd.read_csv('sample-data/Dining Dollars (Meal Plan)_statement_2021-09-01_to_2021-09-30.csv')
october_df = pd.read_csv('sample-data/Dining Dollars (Meal Plan)_statement_2021-10-01_to_2021-10-31.csv')
november_df = pd.read_csv('sample-data/Dining Dollars (Meal Plan)_statement_2021-11-01_to_2021-11-30.csv')
december_df = pd.read_csv('sample-data/Dining Dollars (Meal Plan)_statement_2021-12-01_to_2021-12-14.csv')
january_df = pd.read_csv('sample-data/Dining Dollars (Meal Plan)_statement_2021-12-16_to_2022-01-31.csv')


In [None]:
august_df.head(3)

### Storing the DataFrame into lists catagorized by their semester

In [None]:
fall_semester = [august_df, september_df, october_df, november_df, december_df]
spring_semester = [january_df]
all_semesters = [fall_semester, spring_semester]

## Step Two: Cleaning the Data

### Helper Functions to Clean Up Unneeded Info in the Description Column

In [None]:
def remove_brackets(row):
    return row.split(' [')[0]

def remove_num_at_end(row):
    if row[len(row)-1].isnumeric(): 
        return row[0:len(row)-2]
    else:
        return row

def remove_bad_suffixes(row):
    row = row.removesuffix(' OnDemand')
    row = row.removesuffix(' - Tablet')
    return row

def clean_up_description(row):
    row = remove_brackets(row)
    row = remove_num_at_end(row)
    row = remove_bad_suffixes(row)
    return row

### Helper Function for Creating New Time Columns

In [None]:
day_of_week_map = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}

def create_time_columns(t):
    return pd.Series({'Year': t.year, 'Month': t.month, 'Day Of Week': t.day_of_week, 'Hour': t.hour})

### Cleaning Up DataFrames and Combining Them Based on Semester

In [None]:
def clean_semester_data(semester):
    for i in range(0, len(semester)):
        semester[i]['Description'] = semester[i]['Description'].apply(clean_up_description)
        semester[i]['Date'] = pd.to_datetime(semester[i]['Date'])
        semester[i].sort_values(by='Date', inplace=True)
        semester[i] = pd.concat([semester[i], semester[i]['Date'].apply(create_time_columns)], axis=1)
        semester[i]['Day Of Week'] = semester[i]['Day Of Week'].map(day_of_week_map)
        semester[i] = semester[i].reset_index()
        semester[i].drop('index', axis=1, inplace=True)
        semester[i] = semester[i][['Description', 'Amount', 'Balance', 'Date', 'Year', 'Month', 'Day Of Week', 'Hour']]
    return semester

def aggregate_semester_data(semester):
    semester = clean_semester_data(semester)

    semester_df = semester[0]
    for index in range(1, len(semester)):
        semester_df = pd.concat([semester_df, semester[index]])

    semester_df = semester_df.reset_index()
    semester_df.drop('index', axis=1, inplace=True)
    semester_df['Date'] = semester_df['Date'].apply(lambda date: date.date())

    return semester_df

In [None]:
fall_semester_df = aggregate_semester_data(fall_semester)
spring_semester_df = aggregate_semester_data(spring_semester)

## Step Three: Modeling and Analysing the Data

### The Fall Semester

#### Previewing the Fall Semester Data Frame

In [None]:
fall_semester_df

In [None]:
fall_semester_df.info()

In [None]:
px.line(fall_semester_df, x='Date', y='Balance', title='Balance Over Semester')

#### How Much Did I Spend This Semester

In [None]:
answer = fall_semester_df['Balance'].iloc[0] - fall_semester_df['Balance'].iloc[-1]
print('This semester I spent ${0:.2f} in total'.format(answer))

In [None]:
augTotal = abs(
    fall_semester_df[fall_semester_df['Month'] == 8]['Amount'].iloc[1:].sum())
septTotal = abs(
    fall_semester_df[fall_semester_df['Month'] == 9]['Amount'].sum())
octTotal = abs(
    fall_semester_df[fall_semester_df['Month'] == 10]['Amount'].sum())
novTotal = abs(
    fall_semester_df[fall_semester_df['Month'] == 11]['Amount'].sum())
decTotal = abs(
    fall_semester_df[fall_semester_df['Month'] == 12]['Amount'].sum())

In [None]:
monthSpendingsDf = pd.DataFrame({'Month': ['August', 'September', 'October', 'November', 'December'], 'Total Spent': [
                                augTotal, septTotal, octTotal, novTotal, decTotal]})

fig = px.bar(monthSpendingsDf, x='Month', y='Total Spent',
       title='Money Spent Each Month')
fig.show()

print("""In august I spent ${0:.2f},
In september I spent ${1:.2f},
In october I spent ${2:.2f},
In november I spent ${3:.2f},
In december I spent ${4:.2f}.""".format(augTotal, septTotal, octTotal, novTotal, decTotal))


In [None]:
import math

monthsDict = {
    8: {'name': 'August', 'num': 8}, 
    9: {'name': 'September', 'num': 9}, 
    10: {'name': 'October', 'num': 10}, 
    11: {'name': 'November', 'num': 11}, 
    12: {'name': 'December', 'num': 12}
    }


def getDataFrameSubset(month_dict):
    monthDf = fall_semester_df[fall_semester_df['Month']
                               == month_dict['num']].iloc[0:]
    if month_dict['num'] == 8:
        monthDf = fall_semester_df[fall_semester_df['Month']
                                == month_dict['num']].iloc[1:]
    return monthDf


def groupByDate(monthDf):
    monthDf['Amount'] = monthDf['Amount'].apply(lambda money: abs(money))
    monthSpendingsPerDay = monthDf.groupby(by=['Date'])['Amount'].sum()
    return monthSpendingsPerDay


def createGraph(monthSpendingsPerDay, month_dict):
    fig = px.bar(monthSpendingsPerDay, y='Amount',
                 title='Money Spent Per Day in {0}'.format(month_dict['name']))
    fig.show()


def round_up(n, decimals=0):
    multiplier = 10 ** decimals
    return math.ceil(n * multiplier) / multiplier


def getMeanSpending(month_num):
    monthDf = getDataFrameSubset(monthsDict[month_num])
    monthSpendingsPerDay = groupByDate(monthDf)
    return round_up(monthSpendingsPerDay.mean(), 2)


def spendingInMonthPerDay(month_dict):
    monthDf = getDataFrameSubset(month_dict)
                                
    monthSpendingsPerDay = groupByDate(monthDf)

    createGraph(monthSpendingsPerDay, month_dict)

    print('On average in {0} I spent ${1:.2f} per day over {2} days'.format(
        month_dict['name'], monthSpendingsPerDay.mean(), monthSpendingsPerDay.count()))

In [None]:
spendingInMonthPerDay(monthsDict[8])

In [None]:
spendingInMonthPerDay(monthsDict[9])

In [None]:
spendingInMonthPerDay(monthsDict[10])

In [None]:
spendingInMonthPerDay(monthsDict[11])

In [None]:
spendingInMonthPerDay(monthsDict[11])

In [None]:
avgSpendingEachMonth = []

for month in monthsDict:
    avgSpendingEachMonth.append(getMeanSpending(month))

avgSpendingEachMonth

monthSpendingsDf['Average Spent Per Day'] = avgSpendingEachMonth
monthSpendingsDf

In [None]:
fig = px.bar(monthSpendingsDf, x='Month', y='Average Spent Per Day',
             title='Average Money Spent Per Day Each Month')
fig.show()

In [None]:
# Create Subplot
fig = make_subplots(rows=2, cols=1, subplot_titles=(
    'Money Spent Each Month', 'Average Money Spent Per Day Each Month'))

# Adding Traces
fig.add_trace(
    go.Bar(x=monthSpendingsDf['Month'],
           y=monthSpendingsDf['Total Spent'], name='Total Per Month'),
    row=1, col=1
)
fig.add_trace(
    go.Bar(x=monthSpendingsDf['Month'],
           y=monthSpendingsDf['Average Spent Per Day'], name='Average Per Month'),
    row=2, col=1
)

# Updating Axis
fig.update_xaxes(title_text="Date", row=1, col=1)
fig.update_xaxes(title_text="Date", row=2, col=1)

fig.update_yaxes(title_text="Money Spent", row=1, col=1)
fig.update_yaxes(title_text="Money Spent", row=2, col=1)

# Update title and height
fig.update_layout(
    title_text="Comparing Total Spent Per Month and Average Spent Per Month", height=700)

In [None]:
fall_semester_df.head(1)

In [None]:
spendingsPerDayOfWeek = fall_semester_df.iloc[1:].groupby(
    by="Day Of Week")["Amount"].sum().apply(lambda total: abs(total))
spendingsPerDayOfWeek = spendingsPerDayOfWeek[[
    'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']]
px.bar(spendingsPerDayOfWeek, y='Amount', title='Spendings Per Day Of Week')

In [None]:
fall_semester_df.iloc[1:].groupby(
    by="Day Of Week")["Amount"].count()[[
        'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']]

In [None]:
meanSpendingsPerDayOfWeek = fall_semester_df.iloc[1:].groupby(
    by="Day Of Week")["Amount"].mean().apply(lambda total: abs(total))
meanSpendingsPerDayOfWeek = meanSpendingsPerDayOfWeek[[
    'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']]
px.bar(meanSpendingsPerDayOfWeek, y='Amount',
       title='Mean Spending Per Day Of Week')

##### Conclusion

After looking at the top level of my spending habits I realized that I spend the most in the second month

### The Spring Semester

In [None]:
spring_semester_df.head()

In [None]:
spring_semester_df.info()