In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("default")

In [None]:
file_path = "/Users/shreyu/Downloads/balance.csv"   # change path if needed

df = pd.read_csv(file_path, on_bad_lines='skip')
df.head()

In [None]:
# Convert date_time to datetime
df["date_time"] = pd.to_datetime(df["date_time"])

# Ensure numeric columns
num_cols = ["incoming amt", "outgoing amt", "closing balance"]
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce")

# Fill missing values
df[["incoming amt", "outgoing amt"]] = df[["incoming amt", "outgoing amt"]].fillna(0)

# Sort by date
df = df.sort_values("date_time")

# Feature engineering
df["date"] = df["date_time"].dt.date
df["month"] = df["date_time"].dt.to_period("M")
df["net_flow"] = df["incoming amt"] - df["outgoing amt"]

df.head()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df["date_time"], df["closing balance"])
plt.title("Account Balance Over Time")
plt.xlabel("Date")
plt.ylabel("Closing Balance")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.bar(df["date_time"], df["incoming amt"], label="Incoming")
plt.bar(df["date_time"], -df["outgoing amt"], label="Outgoing")
plt.title("Cash Flow Over Time")
plt.xlabel("Date")
plt.ylabel("Amount")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
monthly = df.groupby("month")[["incoming amt", "outgoing amt"]].sum()

monthly.plot(kind="bar", figsize=(10, 5))
plt.title("Monthly Income vs Expenses")
plt.xlabel("Month")
plt.ylabel("Amount")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df["date_time"], df["net_flow"])
plt.axhline(0)
plt.title("Net Savings Per Transaction")
plt.xlabel("Date")
plt.ylabel("Net Flow")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
top_expenses = df[df["outgoing amt"] > 0].nlargest(10, "outgoing amt")

plt.figure(figsize=(10, 5))
plt.barh(top_expenses["desc"], top_expenses["outgoing amt"])
plt.title("Top 10 Largest Expenses")
plt.xlabel("Amount")
plt.ylabel("Transaction")
plt.tight_layout()
plt.show()

In [None]:
def categorize(desc):
    desc = desc.lower()
    if "amazon" in desc or "flipkart" in desc:
        return "Shopping"
    elif "zomato" in desc or "swiggy" in desc:
        return "Food"
    elif "rent" in desc:
        return "Rent"
    elif "uber" in desc or "ola" in desc:
        return "Travel"
    else:
        return "Other"

df["category"] = df["desc"].astype(str).apply(categorize)

In [None]:
category_spend = df.groupby("category")["outgoing amt"].sum()

plt.figure(figsize=(6, 6))
category_spend.abs().plot(kind="pie", autopct="%1.1f%%")
plt.title("Spending by Category")
plt.ylabel("")
plt.show()