<a href="https://colab.research.google.com/github/priyu9-star/BudgetWise-AI-based-Expense-Forecasting-Tool-Batch-6-Team-C-/blob/main/AIExpenseTracker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
#Run
!pip install -q flask pyngrok plotly pandas matplotlib pyarrow

In [34]:
from google.colab import files
uploaded = files.upload()

Saving Personal_Finance_Dataset.csv to Personal_Finance_Dataset (2).csv


In [35]:
import re
import json
from pathlib import Path
from typing import Optional
import pandas as pd
import numpy as np

In [36]:
filename = list(uploaded.keys())[0]
print("‚úÖ Uploaded file:", filename)

# Load dataset
df_preview = pd.read_csv(filename)
print("Shape:", df_preview.shape)
df_preview.head()

‚úÖ Uploaded file: Personal_Finance_Dataset (2).csv
Shape: (1500, 5)


Unnamed: 0,Date,Transaction Description,Category,Amount,Type
0,2020-01-02,Score each.,Food & Drink,1485.69,Expense
1,2020-01-02,Quality throughout.,Utilities,1475.58,Expense
2,2020-01-04,Instead ahead despite measure ago.,Rent,1185.08,Expense
3,2020-01-05,Information last everything thank serve.,Investment,2291.0,Income
4,2020-01-13,Future choice whatever from.,Food & Drink,1126.88,Expense


In [37]:
from datetime import datetime
CSV_PATH=Path(filename)
OUTPUT_DIR=Path("/content/expense_analysis_outputs")
OUTPUT_DIR.mkdir(exist_ok=True)

In [38]:
DATE_COLS = ["date","transaction_date","txn_date","posted_date"]
AMOUNT_COLS = ["amount","amt","value","transaction_amount","debit","credit"]
CATEGORY_COLS = ["category","cat","expense_category","merchant_category"]
DESC_COLS = ["description","memo","narration","details"]
TYPE_COLS = ["type","transaction_type","kind"]

In [39]:
_amount_re = re.compile(r"[^\d\.\-\+]")

In [40]:
def clean_amount(x):
    if pd.isna(x): return None
    if isinstance(x,(int,float,np.number)): return float(x)
    s=str(x).strip()
    if s.startswith("(") and s.endswith(")"):
        s="-"+s[1:-1]
    s=_amount_re.sub("",s)
    try: return float(s)
    except: return None

In [41]:
def guess_col(candidates, columns):
    cols_lower = {c.lower(): c for c in columns}
    for name in candidates:
        if name.lower() in cols_lower: return cols_lower[name.lower()]
    return None

df = pd.read_csv(CSV_PATH, low_memory=False)
cols = df.columns

DATE_COL = guess_col(DATE_COLS, cols)
AMOUNT_COL = guess_col(AMOUNT_COLS, cols)
CATEGORY_COL = guess_col(CATEGORY_COLS, cols)
DESC_COL = guess_col(DESC_COLS, cols)
TYPE_COL = guess_col(TYPE_COLS, cols)

print(f"Detected ‚Üí Date:{DATE_COL}  Amount:{AMOUNT_COL}  Category:{CATEGORY_COL}")

df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors="coerce")
df = df.dropna(subset=[DATE_COL])

df["Amount"] = df[AMOUNT_COL].map(clean_amount)
df = df.dropna(subset=["Amount"])

df["Category"] = df[CATEGORY_COL].fillna("Uncategorized") if CATEGORY_COL else "Uncategorized"
df["Description"] = df[DESC_COL] if DESC_COL else ""
df["Type"] = df[TYPE_COL].astype(str).str.lower() if TYPE_COL else ""

df["YearMonth"] = df[DATE_COL].dt.to_period("M").astype(str)
df["Day"] = df[DATE_COL].dt.date

Detected ‚Üí Date:Date  Amount:Amount  Category:Category


In [42]:
nonzero = df[df["Amount"]!=0]["Amount"]
neg_frac = (nonzero<0).mean() if len(nonzero)>0 else 0

if neg_frac>=0.5:
    df["Expense"] = df["Amount"].apply(lambda x:-x if x<0 else 0)
    df["Income"] = df["Amount"].apply(lambda x:x if x>0 else 0)
    sign_rule="expenses_are_negative"
else:
    if TYPE_COL:
        t=df["Type"]
        exp_mask=t.isin(["expense","debit","withdrawal","payment","spent"])
        df["Expense"]=df["Amount"].where(exp_mask,0).abs()
        df["Income"]=df["Amount"].where(~exp_mask,0).abs()
    else:
        df["Expense"]=df["Amount"].apply(lambda x:x if x>0 else 0)
        df["Income"]=df["Amount"].apply(lambda x:-x if x<0 else 0)
    sign_rule="expenses_are_positive_or_type_based"

print("Sign inference:", sign_rule)

Sign inference: expenses_are_positive_or_type_based


In [43]:
expenses = df[df["Expense"]>0].copy()

monthly_totals = (
    df.groupby("YearMonth")
      .agg(Total_Expense=("Expense","sum"),
           Total_Income=("Income","sum"),
           Transactions=("Amount","count"))
      .reset_index()
)

cat_summary = (
    expenses.groupby("Category")
            .agg(Total=("Expense","sum"),
                 Count=("Expense","count"),
                 Avg=("Expense","mean"),
                 Median=("Expense","median"))
            .reset_index()
            .sort_values("Total", ascending=False)
)
cat_summary["Percent_of_Total"] = (cat_summary["Total"]/cat_summary["Total"].sum()*100).round(2)

peak_days = (
    expenses.groupby("Day")["Expense"].sum().reset_index()
            .sort_values("Expense",ascending=False)
)
peak_days_top = peak_days.head(5)

peak_per_month = (
    expenses.groupby(["YearMonth","Day"])["Expense"].sum().reset_index()
)
peak_per_month = peak_per_month.loc[
    peak_per_month.groupby("YearMonth")["Expense"].idxmax()
].rename(columns={"Day":"Peak_Spending_Day","Expense":"Peak_Total"})

print("‚úÖ Monthly Totals")
display(monthly_totals.head())
print("\n‚úÖ Category Summary")
display(cat_summary.head())
print("\n‚úÖ Peak Days")
display(peak_days_top)

‚úÖ Monthly Totals


Unnamed: 0,YearMonth,Total_Expense,Total_Income,Transactions
0,2020-01,17138.25,5578.0,17
1,2020-02,17108.41,20070.0,22
2,2020-03,13581.81,3465.0,17
3,2020-04,16233.05,7370.0,17
4,2020-05,16846.13,6008.0,24



‚úÖ Category Summary


Unnamed: 0,Category,Total,Count,Avg,Median,Percent_of_Total
6,Travel,169497.79,160,1059.361188,1132.88,13.81
3,Rent,162075.39,165,982.275091,918.36,13.21
1,Food & Drink,159493.39,149,1070.425436,1162.57,13.0
4,Salary,149053.55,146,1020.914726,1082.96,12.15
0,Entertainment,148165.47,143,1036.122168,1005.52,12.07



‚úÖ Peak Days


Unnamed: 0,Day,Expense
729,2024-01-19,5775.57
801,2024-06-23,5634.35
550,2023-01-19,5400.24
629,2023-06-25,4764.49
740,2024-02-10,4728.86


In [48]:
from pyngrok import ngrok
import os

# Kill all active ngrok tunnels
try:
    ngrok.kill()
    print("‚úÖ All ngrok tunnels killed.")
except Exception as e:
    print(f"‚ö†Ô∏è Error killing ngrok tunnels: {e}")

# Kill Flask on port 5000 if running
os.system("fuser -k 5000/tcp")
print("‚úÖ Attempted to kill Flask process on port 5000.")


‚úÖ All ngrok tunnels killed.
‚úÖ Attempted to kill Flask process on port 5000.


In [49]:
from pyngrok import ngrok
ngrok.set_auth_token("35KueGADcf95Z3RT8YYZ5KVaWTl_NbbBqKZ8wzPkNaN27Hvi")
print("‚úÖ Ngrok authtoken configured successfully.")

‚úÖ Ngrok authtoken configured successfully.


In [50]:
import pandas as pd
from flask import Flask, jsonify, render_template_string, request
from pyngrok import ngrok
import numpy as np

# ========== Data Prep ==========
df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
date_col = next((c for c in df.columns if "date" in c), None)
amt_col = next((c for c in df.columns if "amount" in c or "expense" in c), None)
cat_col = next((c for c in df.columns if "cat" in c or "type" in c), None)
if not (date_col and amt_col and cat_col):
    raise ValueError("Dataset must include date, amount, and category columns.")

df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
df = df.dropna(subset=[date_col])
df["month"] = df[date_col].dt.to_period("M").astype(str)

monthly = df.groupby("month")[amt_col].sum().reset_index()
category = df.groupby(cat_col)[amt_col].sum().reset_index().rename(columns={cat_col:"category"})
daily = df.groupby(df[date_col].dt.date)[amt_col].sum().reset_index().rename(columns={date_col:"date"})

peak_days = daily.sort_values(amt_col, ascending=False).head(5)
total_spend = df[amt_col].sum()
top_cat = category.loc[category[amt_col].idxmax()]
peak_day = peak_days.iloc[0]

# ========== Flask App ==========
app = Flask(__name__)

landing_page = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>üí∞ Smart Expense Insights</title>
<style>
body {
  font-family: 'Poppins', sans-serif;
  background: radial-gradient(circle at top left, #2563eb, #60a5fa);
  color: white;
  text-align: center;
  margin: 0;
  overflow: hidden;
}
h1 {
  margin-top: 20vh;
  font-size: 3em;
  animation: fadeIn 2s ease;
}
p {font-size: 1.2em; opacity: 0.8;}
button {
  background: white;
  color: #2563eb;
  border: none;
  padding: 15px 35px;
  border-radius: 30px;
  font-size: 1.1em;
  margin-top: 30px;
  cursor: pointer;
  transition: 0.3s ease;
}
button:hover {
  transform: scale(1.1);
  background: #e0e7ff;
}
@keyframes fadeIn {
  from {opacity: 0; transform: translateY(30px);}
  to {opacity: 1; transform: translateY(0);}
}
</style>
</head>
<body>
<h1>üí∏ Welcome to Smart Expense Insights</h1>
<p>Visualize your spending patterns, discover trends, and gain control of your finances.</p>
<button onclick="window.location.href='/dashboard'">üöÄ Launch Dashboard</button>
</body>
</html>
"""

dashboard_html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Expense Dashboard</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>
body {
  font-family: 'Poppins', sans-serif;
  background: linear-gradient(180deg,#f9fafb,#eff6ff);
  margin: 0;
  color: #1e293b;
}
header {
  background: #2563eb;
  color: white;
  text-align: center;
  padding: 20px;
  font-size: 1.8em;
}
.grid {
  display: grid;
  grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
  gap: 25px;
  margin: 30px;
}
.card {
  background: white;
  border-radius: 16px;
  box-shadow: 0 6px 12px rgba(0,0,0,0.1);
  padding: 20px;
}
select {
  padding: 10px;
  border-radius: 8px;
  border: 1px solid #ccc;
  margin-bottom: 15px;
}
</style>
</head>
<body>
<header>üíπ Interactive Expense Dashboard</header>

<div style="text-align:center;margin:20px;">
  <label>Filter by Month:</label>
  <select id="monthFilter" onchange="updateCharts()"></select>
</div>

<div class="grid">
  <div class="card"><div id="monthly"></div></div>
  <div class="card"><div id="category"></div></div>
  <div class="card"><div id="peak"></div></div>
</div>

<script>
async function loadData(){
  const res = await fetch('/analysis');
  const data = await res.json();
  window.data = data;

  const months = [...new Set(data.monthly.map(x=>x.month))];
  const monthFilter = document.getElementById('monthFilter');
  monthFilter.innerHTML = '<option value="all">All Months</option>' +
      months.map(m=>'<option value="'+m+'">'+m+'</option>').join('');

  renderCharts(data);
}

function renderCharts(data){
  Plotly.newPlot('monthly',[{
    x: data.monthly.map(d=>d.month),
    y: data.monthly.map(d=>d.amount),
    type:'bar',
    marker:{color:'#3b82f6'}
  }],{title:'Monthly Totals',margin:{t:40}});

  Plotly.newPlot('category',[{
    labels: data.category.map(d=>d.category),
    values: data.category.map(d=>d.amount),
    type:'pie'
  }],{title:'Category Breakdown'});

  Plotly.newPlot('peak',[{
    x: data.peak.map(d=>d.date),
    y: data.peak.map(d=>d.amount),
    mode:'lines+markers',
    marker:{color:'#ef4444'}
  }],{title:'Peak Spending Days'});
}

function updateCharts(){
  const val = document.getElementById('monthFilter').value;
  const filtered = val==='all' ? window.data : {
    ...window.data,
    monthly: window.data.monthly.filter(x=>x.month===val),
    category: window.data.category,
    peak: window.data.peak
  };
  renderCharts(filtered);
}

loadData();
</script>
</body>
</html>
"""

@app.route('/')
def landing():
    return render_template_string(landing_page)

@app.route('/dashboard')
def dashboard():
    return render_template_string(dashboard_html)

@app.route('/analysis')
def analysis():
    return jsonify({
        'monthly': monthly.rename(columns={amt_col:'amount'}).to_dict(orient='records'),
        'category': category.rename(columns={amt_col:'amount'}).to_dict(orient='records'),
        'peak': peak_days.rename(columns={amt_col:'amount'}).to_dict(orient='records'),
        'summary': {
            'total': round(float(total_spend),2),
            'top_category': top_cat['category'],
            'peak_day': str(peak_day['date'])
        }
    })

# ‚úÖ Connect ngrok (force new subdomain to avoid conflicts)
public_url = ngrok.connect(5000, subdomain=None).public_url
print("üåç Public URL:", public_url)
print("‚û°Ô∏è  Visit / to see landing page, /dashboard for insights.")

app.run(port=5000)

üåç Public URL: https://waggly-bristol-swirly.ngrok-free.dev
‚û°Ô∏è  Visit / to see landing page, /dashboard for insights.
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [12/Nov/2025 16:41:04] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [12/Nov/2025 16:41:07] "GET /dashboard HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [12/Nov/2025 16:41:07] "GET /analysis HTTP/1.1" 200 -
