## Chase Credit Card Transaction History Analysis


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('data/chase.csv')
df.head()

In [None]:
df.info()

In [None]:
# chase specific cleaning
df = df[~df['Type'].isin(['Payment', 'Adjustment', 'Fee'])]
df = df.drop(['Post Date', 'Memo'], axis=1)
df['Amount'] *= -1
df.head()

In [None]:
from cleaning import clean_df

df = clean_df(df)
print(df['Description'].to_string(index=False))

In [None]:
df.info()

### Start Analysis

1. Top Spend categories by percentage money spent
2. TODO: One place spent just like you (?)
3. How many different places spent 
4. Top 5 Places visited by frequency
5. Total money spent and day spent the most
6. Top 5 places visited by money spent


In [None]:
# Calculate the total money spent
total_spent = df['Amount'].sum()

# Group the data by category and calculate the sum of amounts for each category
category_spent = df.groupby('Category')['Amount'].sum()

# Calculate the percentage of total money spent for each category
category_percentage = (category_spent / total_spent) * 100

# Sort the categories by percentage in descending order
top_categories = category_percentage.sort_values(ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x=top_categories.index, y=top_categories.values)
plt.title('Top Spend Categories by Percentage of Total Money Spent')
plt.xlabel('Category')
plt.ylabel('Percentage of Total Money Spent')
plt.xticks(rotation=45)
plt.show()

In [None]:
df["Description"].nunique()

In [None]:
df["Description"].value_counts()[:10]

In [None]:
# Group the data by transaction date and calculate the sum of amounts for each date
date_spent = df.groupby('Transaction Date')['Amount'].sum()

# Find the date with the maximum amount spent
date_with_most_spent = date_spent.idxmax()

# Get the amount spent for the date with the maximum amount spent
amount_spent = date_spent[date_with_most_spent]

date_with_most_spent, amount_spent

In [None]:
top_description = df.groupby('Description')['Amount'].sum().nlargest(5)
top_description

Some of my other ideas 
- distribution of purchases
- average price of a meal
- weekend vs weekday spend
- total spend on each day of the week

In [None]:
sns.displot(df['Amount'])

In [None]:
sns.displot(df[(df['Category'] == 'Food & Drink')], binwidth=2)

In [None]:
food_drink_transactions = df[(df['Category'] == 'Food & Drink') & (df['Amount'] <= 50) & (df['Amount'] >= 7)]
average_amount = food_drink_transactions['Amount'].mean()
average_amount

In [None]:
# Convert 'Transaction Date' column to datetime
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], format='%m/%d/%Y')

# Group the data by day of the week and calculate the sum of amounts for each day
day_spent = df.groupby(df['Transaction Date'].dt.day_name())['Amount'].sum()

# Plot the total money spent by day
plt.figure(figsize=(10, 6))
sns.barplot(x=day_spent.index, y=day_spent.values)
plt.title('Total Money Spent by Day')
plt.xlabel('Day')
plt.ylabel('Total Money Spent')
plt.xticks(rotation=45)
plt.show()

In [None]:
tuesday_transactions = df[df['Transaction Date'].dt.day_name() == 'Tuesday']
tuesday_transactions = tuesday_transactions.sort_values('Amount', ascending=False)
tuesday_transactions