In [26]:
import datetime as dt
import pandas as pd

from budget_analytics.utils.fileio import CardsFileIO, FileIO
from budget_analytics.utils.recurring import expand_recurring
from budget_analytics.constants import DataSource, User, CashflowDirection, BankAccount, ExpenditureCategory

In [27]:
expenditure_amex = CardsFileIO(user=User.MICHAEL).read_statement(BankAccount.AMEX_CASHBACK)

In [28]:
df1 = expenditure_amex[["Description", "Category"]]
df1["Description"] = df1["Description"].str.split(r"[\s*]+").map(lambda s: " ".join(s).strip().lower())
df1.index = df1["Description"]
amex_cat_map = df1.drop(columns="Description").to_dict()["Category"]

def categorize(value: str, mapping: dict = amex_cat_map) -> str | None:
    value = value.lower()
    if value in mapping.keys():
        return mapping[value]
    else:
        for val, cat in mapping.items():
            if value in val:
                return cat
        return None

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1["Description"] = df1["Description"].str.split(r"[\s*]+").map(lambda s: " ".join(s).strip().lower())


In [29]:
expenditure_chase = CardsFileIO(user=User.MICHAEL).read_statement(BankAccount.CHASE_BA)
expenditure_chase["Category"] = expenditure_chase["Description"].map(categorize)

In [30]:
expenditure_chase

Unnamed: 0,Date,Description,Amount,Time,Category
0,2025-07-01,Bread Ahead,-12.45,10:35,
1,2025-07-01,Bread Ahead,-3.94,10:35,
2,2025-07-01,Lidl,-21.63,11:15,groceries
3,2025-07-02,Eat Tokyo Tonkotsu Ya,-30.90,10:46,
4,2025-07-02,Sushidog Limit - Zettle,-8.45,16:12,
...,...,...,...,...,...
71,2025-08-30,American Express,-1182.62,13:44,
72,2025-08-31,Victoria Coffee House,-30.55,16:02,
73,2025-08-31,Greggs,-6.05,16:53,
74,2025-08-31,Eight Bells Inn,-9.40,16:55,


In [None]:
expenditure_amex.head(3)

In [None]:
expenditure_amex.groupby(by="Date")["Amount_GBP"].sum().plot()


In [None]:

expenditure_amex.loc[expenditure_amex.Category=="restaurant"].groupby(by="Town/City")["Amount_GBP"].sum().plot.pie()

In [None]:
def foo(x):
    return "weekend" if x.weekday()>=5 else "weekday"

In [None]:
df1 = expenditure_amex.groupby(by="Date")[["Amount_GBP"]].sum()

In [None]:
df1["Weekday"]=df1.index.map(lambda x: x.weekday())

In [None]:
df1.groupby(by="Weekday")["Amount_GBP"].mean().plot()

In [None]:
expenditure_amex.loc["Eat Tokyo" in expenditure_amex.Description]

In [None]:
expenditure_amex.loc[expenditure_amex.Description.str.contains("Lidl", case=False)]["Amount_GBP"].mean()

In [None]:
expenditure_amex.loc[expenditure_amex.Description.str.contains("asda", case=False)]["Amount_GBP"].mean()

In [None]:
df1=expenditure_amex["Description"].value_counts()

In [None]:
df2=expenditure_amex.groupby(by="Description")["Amount_GBP"].mean().sort_values(ascending=False)

In [None]:
pd.concat([df1,df2], axis=1).sort_values(by=["Amount_GBP"], ascending=False)

In [None]:
expenditure_amex.loc[expenditure_amex["Description"].str.contains("MYUNGGA",case=False)]

In [None]:
expenditure_amex["Weekday"]=expenditure_amex.Date.map(lambda x: x.weekday())
expenditure_amex.
expenditure_amex.groupby(by="Weekday")["Amount_GBP"].sum()

In [None]:
expenditure_amex.loc[expenditure_amex.Weekday=="weekend"].groupby(by="Date")["Amount_GBP"].sum().mean()


In [None]:
expenditure_amex.groupby("Category", sort=False).Amount_GBP.sum().plot.pie()

In [None]:
user = User.MICHAEL
start_date = dt.date(2025, 9, 1)
end_date = dt.date(2025, 9, 30)

expenditure_amex = CardsFileIO(user=user).read_statement(BankAccount.AMEX_CASHBACK)
expenditure_amex = expenditure_amex.loc[
    (expenditure_amex.Date >= start_date) & (expenditure_amex.Date <= end_date)
]
expenditure_amex["Direction"] = CashflowDirection.OUTFLOW.value
expenditure_amex["Data_Source"] = DataSource.AMEX.value
expenditure_amex["Bank_Account"] = BankAccount.AMEX_CASHBACK.value
recurring = expand_recurring(
    FileIO(user=user).read_recurring(), start_date, end_date
)
recurring["Data_Source"] = DataSource.RECURRING.value
recurring["Bank_Account"] = None
cashflows = pd.concat([expenditure_amex[recurring.columns], recurring], axis=0).sort_values(["Date", "Amount_GBP"]).reset_index(drop=True)

In [None]:
expenditure_amex

In [None]:
expenditure_amex.groupby("Category", sort=False).Amount_GBP.sum().sum()

In [None]:
from budget_analytics.constants import ExpenditureCategory
expenditure_amex.loc[expenditure_amex.Category==ExpenditureCategory.OTHER.value]

In [None]:
recurring = expand_recurring(
    FileIO(user=user).read_recurring(), start_date, dt.date(2026,3,1)
)

In [None]:
recurring["Cumulative_Amount"] = (recurring.Amount_GBP * recurring.Direction.map(lambda x: CashflowDirection(x).multiplier)).cumsum()

In [None]:
recurring_net =

In [None]:
recurring

In [None]:
cashflows.Amount_GBP * cashflows.Direction.map(lambda x: CashflowDirection(x).multiplier)

In [None]:
cashflows

In [None]:
(cashflows.Amount_GBP * cashflows.Direction.map(lambda x: CashflowDirection(x).multiplier)).sum()

In [None]:
list(FileIO(data_path="cards_michael/chase_ba").read_all_csv(skiprows=1).values())[0].reset_index()

In [None]:
from budget_analytics.constants import Calendar
from budget_analytics.utils.calendar import get_calendar_dates

dates = get_calendar_dates(Calendar.GB, dt.date(2010,1,1), dt.date(2010,1,31))

In [None]:
dates

In [None]:
import holidays as hol
dt.date(2020,1,1) in hol.country_holidays(country="GB", subdiv="ENG")

In [None]:
list(map(lambda d: d.date(), dates))

In [None]:
dfs = FileIO(data_path="cards/amex_michael").read_all_csv()
df = pd.concat(dfs.values(), axis=0)
df["Date"] = df["Date"].map(lambda s: dt.datetime.strptime(s, "%d/%m/%Y").date())
df = df.sort_values(by=["Date"]).reset_index(drop=True)
df = df[["Date","Category","Description", "Amount","Extended Details"]]

In [None]:
df

In [None]:
df.sort_values("Category")

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,1,figsize=(10,5))
df1 = df.groupby(["Category","Date"])["Amount"].sum().reset_index("Category").pivot(columns="Category").fillna(0).cumsum()
df1.iloc[-1].sort_values(ascending=False)
plt.legend(fontsize=7)
plt.show()

In [None]:
df1 = df.groupby(["Category","Date"])["Amount"].sum().reset_index("Category").pivot(columns="Category").fillna(0).cumsum()

In [None]:
df1.iloc[-1].sort_values(ascending=False).index

In [None]:
df.loc[(df.Date >= dt.date(2025, 8, 1)) & (df.Date <= dt.date(2025, 8, 31)) & (df.Category == "Entertainment-Restaurants")].Amount.sum()

In [None]:
df.loc[(df.Date >= dt.date(2025, 8, 1)) & (df.Date <= dt.date(2025, 8, 31))].groupby("Category")["Amount"].sum()

In [None]:
df.loc[(df.Date >= dt.date(2025, 8, 1)) & (df.Date <= dt.date(2025, 8, 31))].sort_values("Category")

In [None]:
df.loc[(df.Date >= dt.date(2025, 8, 1)) & (df.Date <= dt.date(2025, 8, 31)) & ~df["Extended Details"].isna()]

In [None]:
df = dfs["activity_month_20250908"]

In [None]:
df