7. Creating a Streamlit App

In [None]:
!pip install streamlit squarify

In [143]:
%%writefile StreamlitApp.py
import streamlit as st
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Function to run SQL queries
def get_data(query, params=None):
    conn = sqlite3.connect("expenses_database.sqlite")
    if params:
        df = pd.read_sql_query(query, conn, params=params)
    else:
        df = pd.read_sql_query(query, conn)
    conn.close()
    return df

# App Setup
# Streamlit App Title
st.set_page_config(page_title=":money_with_wings: Expense Tracker", layout="wide")

# Sidebar for navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Project Introduction", "Expense Visualizations", "SQL Queries-1","SQL Queries-2","Query Visualizations","Creator Info"])

# PAGE 1: Introduction
if page == "Project Introduction":
    st.title(":money_with_wings: Expense Tracker Analysis")
    st.image("expenses.jpg",)  # Adding Image
    st.subheader("Track and analyze your personal expenses with visual insights")
    st.markdown("""
    Welcome to the Expense Tracker, a streamlined application built to help you monitor, analyze, and visualize your personal expenses with clarity and ease.


    :pushpin: What This Project Does:

    This app loads your expense data stored in an SQLite database and provides powerful insights through:

    - Dynamic visualizations of spending trends

    - SQL-powered queries to dive deep into your transactions

    - Easy filtering by category, month, and payment method

    - Cashback analysis and high-spending category breakdowns


    :brain: Why Use It?

    Tracking your expenses isn't just about saving receipts — it's about understanding where your money goes. This app empowers you to:

    - Identify spending patterns

    - Discover potential savings

    - Make informed budgeting decisions


    :open_file_folder: Data Used:

    Stored in a local SQLite database (expenses_data.sqlite)

    Columns include: date, category, amount, payment_mode, cashback, description, and more


    :hammer_and_wrench: Technologies:

    - Python

    - SQL
    
    - Streamlit
    """)

# PAGE 2: Expense Visualizations
elif page == "Expense Visualizations":
    st.title(":bar_chart: Expense Data Visualizer (2024)")

    # Load full data
    df = get_data("SELECT * FROM expenses_data")

    if not df.empty:
        # Convert and filter only 2024 data
        df["Date"] = pd.to_datetime(df["Date"])
        df = df[df["Date"].dt.year == 2024]
        df["month"] = df["Date"].dt.month

        # Multiselect filters
        all_categories = df["Category"].unique().tolist()
        all_dates = df["Date"].dt.date.unique()
        all_months = sorted(df["month"].unique())
        all_payment_modes = df["Payment_Mode"].unique().tolist()

        selected_categories = st.multiselect("Select Categories", all_categories, default=all_categories)
        selected_dates = st.multiselect("Select Specific Dates", all_dates)
        selected_months = st.multiselect("Select Months", all_months, default=all_months)
        selected_modes = st.multiselect("Select Payment Modes", all_payment_modes, default=all_payment_modes)

        # Filter logic
        filtered_df = df[
            (df["Category"].isin(selected_categories)) &
            ((df["Date"].dt.date.isin(selected_dates)) if selected_dates else True) &
            (df["month"].isin(selected_months)) &
            (df["Payment_Mode"].isin(selected_modes))
        ]

        if not filtered_df.empty:
            st.write("### Filtered Expense Data", filtered_df)

            # Select metric to visualize
            feature = st.selectbox("Select Feature to Visualize", ["Amount", "Cashback"])

            # Line Chart of selected feature
            st.write(f"### {feature.capitalize()} Over Time")
            fig1, ax1 = plt.subplots(figsize=(10, 4))
            sns.lineplot(data=filtered_df, x="Date", y=feature, marker="o", ax=ax1)
            plt.xticks(rotation=45)
            st.pyplot(fig1)

            # Bar Chart: Category Spend
            st.write("### Total Spend by Category")
            fig2, ax2 = plt.subplots()
            cat_data = filtered_df.groupby("Category")[feature].sum().reset_index()
            sns.barplot(data=cat_data, x=feature, y="Category", palette="viridis", ax=ax2)
            st.pyplot(fig2)

            # Pie Chart: Payment Mode
            st.write("### Payment Mode Distribution")
            fig3, ax3 = plt.subplots()
            filtered_df["Payment_Mode"].value_counts().plot.pie(autopct='%1.1f%%', ax=ax3)
            ax3.set_ylabel('')
            st.pyplot(fig3)

        else:
            st.warning("No data available for the selected filters.")
    else:
        st.error("No data found in the database.")

# PAGE 3: SQL Queries Table View Part-1
elif page == "SQL Queries-1":
    st.title(":clipboard: SQL Query Results")

    queries = {
        "Total amount spent in each category" : "SELECT category, SUM(amount) AS total_spent FROM expenses_data GROUP BY category ORDER BY total_spent DESC;",
        "Total amount spent using each payment mode" :"SELECT payment_mode, SUM(amount) AS total_spent FROM expenses_data GROUP BY payment_mode ORDER BY total_spent DESC;",
        "Total cashback received across all transactions" : "SELECT SUM(cashback) AS total_cashback FROM expenses_data;",
        "Top 5 most expensive categories in terms of spending" : "SELECT category, SUM(amount) AS total_spent FROM expenses_data GROUP BY category ORDER BY total_spent DESC LIMIT 5;",
        "Amount spent on transportation using different payment modes" : "SELECT payment_mode, SUM(amount) AS transport_spent FROM expenses_data WHERE category = 'Transportation' GROUP BY payment_mode;",
        "Transactions resulted in cashback" : "SELECT * FROM expenses_data WHERE cashback > 0 ORDER BY cashback DESC;",
        "Total spending in each month of the year" : "SELECT strftime('%m', date) AS month, SUM(amount) AS total_spent FROM expenses_data GROUP BY month ORDER BY month;",
        "Months that have the highest spending in categories like (Travel, Entertainment, Gifts)" : "SELECT strftime('%m', date) AS month, category, SUM(amount) AS total_spent FROM expenses_data WHERE category IN ('Transportation', 'Shopping') GROUP BY category",
        "Recurring expenses during specific months (e.g., insurance premiums, property taxes)" : "SELECT strftime('%m', date) AS month, Description, COUNT(*) AS num_expenses, SUM(amount) AS total_spent FROM expenses_data WHERE category IN ('Bills', 'Insurance') GROUP BY month, description ORDER BY month;",
        "Cashback or rewards earned in each month" : "SELECT strftime('%m', date) AS month, SUM(cashback) AS total_cashback FROM expenses_data GROUP BY month ORDER BY month;",
        "Typical costs associated with different types of travel" : "SELECT description, AVG(amount) AS avg_cost, SUM(amount) AS total_cost FROM expenses_data WHERE category = 'Transportation' GROUP BY description;",
        "High and Low Priority Categories" :"""SELECT 
                                                    category,
                                                    SUM(amount) AS total_spent,
                                                    CASE 
                                                        WHEN SUM(amount) >1966031 THEN 'High Priority'
                                                        WHEN SUM(amount) <1926086  THEN 'Low Priority'
                                                        ELSE 'Medium Priority'
                                                    END AS priority
                                                FROM expenses_data
                                                GROUP BY category
                                                ORDER BY total_spent DESC;""",
        "Category that contributes the highest percentage of the total spending" : """SELECT 
                                                                                        category,
                                                                                        SUM(amount) AS total_spent,
                                                                                        ROUND(SUM(amount) * 100.0 / (SELECT SUM(amount) FROM expenses_data), 2) AS percentage_of_total
                                                                                       FROM expenses_data
                                                                                       GROUP BY category
                                                                                       ORDER BY percentage_of_total DESC
                                                                                       LIMIT 1;"""


    }
    selected_query = st.selectbox("Choose a Query", list(queries.keys()))
    result = get_data(queries[selected_query])
    st.dataframe(result)

    viz_queries={
        "How has your overall spending changed over time (e.g., increasing, decreasing, remaining stable)" : "SELECT strftime('%m', date) AS Month, SUM(amount) AS Total_Spent FROM expenses_data GROUP BY Month ORDER BY Month;",
        "Patterns in grocery spending during specific seasons" : """SELECT 
                                                                        CASE 
                                                                            WHEN strftime('%m', date) IN ('12', '01', '02') THEN 'Winter'
                                                                            WHEN strftime('%m', date) IN ('03', '04', '05') THEN 'Spring'
                                                                            WHEN strftime('%m', date) IN ('06', '07', '08') THEN 'Summer'
                                                                            WHEN strftime('%m', date) IN ('09', '10', '11') THEN 'Fall'
                                                                        END AS season,
                                                                        SUM(amount) AS total_spent
                                                                    FROM expenses_data
                                                                    WHERE category = 'Groceries'
                                                                    GROUP BY season
                                                                    ORDER BY total_spent DESC;"""
    }
    selected_viz = st.selectbox("Choose a Visualization", list(viz_queries.keys()))
    df = get_data(viz_queries[selected_viz])

    #visualizations
    if selected_viz == "How has your overall spending changed over time (e.g., increasing, decreasing, remaining stable)":
        st.subheader(":chart_with_upwards_trend: Overall spending changed over time")
        fig, ax = plt.subplots()
        sns.lineplot(data=df, x="Month", y="Total_Spent", marker='o', ax=ax)
        ax.set_xlabel("Month")
        ax.set_ylabel("Total Spending")
        ax.set_title("Monthly Spending Trend")
        plt.xticks(rotation=45)
        st.pyplot(fig)
    elif selected_viz=="Patterns in grocery spending during specific seasons":
        st.subheader(":bar_chart: Patterns in grocery spending during seasons")
        fig, ax = plt.subplots()
        sns.barplot(data=df, x="season", y="total_spent",palette="crest" , ax=ax)
        ax.set_xlabel("Season")
        ax.set_ylabel("Total Spending")
        ax.set_title("Patterns in grocery spending")
        plt.xticks(rotation=45)
        st.pyplot(fig)

# PAGE 4: SQL Queries Table View Part-2 
elif page == "SQL Queries-2":
    st.title(":clipboard: SQL Query Results")

    queries = {
        "Most Frequently Used Payment Mode" : "SELECT payment_mode, COUNT(*) AS count FROM expenses_data GROUP BY payment_mode ORDER BY count DESC LIMIT 1;",
        "Total Spend Per Description" : "SELECT description, SUM(amount) AS total FROM expenses_data GROUP BY description ORDER BY total DESC;",
        "Transactions Where Cashback > 25% of Amount Paid" : "SELECT date, category, description, amount, cashback FROM expenses_data WHERE cashback > (amount * 0.25);",
        "Highest Spending Day" : "SELECT date, SUM(amount) AS total_spent FROM expenses_data GROUP BY date ORDER BY total_spent DESC LIMIT 1;",
        "Day with Maximum Number of Transactions" : "SELECT date, COUNT(*) AS transaction_count FROM expenses_data GROUP BY date ORDER BY transaction_count DESC LIMIT 1;",
        "Count of Transactions where amount spent is greater than 1800": "SELECT COUNT(*) AS transactions_over_1800 FROM expenses_data WHERE amount > 1800;",
        "Min and Max Spend Per Category": "SELECT category, MIN(amount) AS min_spent, MAX(amount) AS max_spent FROM expenses_data GROUP BY category ORDER BY category;",
        "Transactions Where Description is 'Netflix'" : "SELECT date, category, payment_mode, description, amount, cashback FROM expenses_data WHERE LOWER(description) = 'netflix';",
        "Transactions Where Cashback is Between 1 and 4" : "SELECT date, category, payment_mode, description, amount, cashback FROM expenses_data WHERE cashback BETWEEN 1 AND 4;",
        "Total Amount Spent on Medical Expenses" : "SELECT SUM(amount) AS total_medical_spending FROM expenses_data WHERE LOWER(category) = 'medical expense';"  
    }

    selected_query = st.selectbox("Choose a Query", list(queries.keys()))
    result = get_data(queries[selected_query])
    st.dataframe(result)

# PAGE 5: Query Visualizations
elif page == "Query Visualizations":
    st.title(":bar_chart: Query-Based Visual Insights")

    viz_queries = {
        "Top 10 Highest Transactions by Amount":
            "SELECT DISTINCT date, category, description, payment_mode, amount FROM expenses_data ORDER BY amount DESC LIMIT 10",
        
        "Average Monthly Spending":
            "SELECT strftime('%m', date) AS month, ROUND(AVG(amount), 2) AS avg_monthly_spent FROM expenses_data GROUP BY month ORDER BY month",
        
        "Categories with Most Cashback Earned":
            "SELECT category, SUM(cashback) AS total_cashback FROM expenses_data GROUP BY category ORDER BY total_cashback DESC",
        
        "Spending Breakdown by Payment Mode per Month":
            "SELECT strftime('%m', date) AS month, payment_mode, SUM(amount) AS total FROM expenses_data GROUP BY month, payment_mode ORDER BY month",
        
        "Average Spending per Transaction in Each Category":
            "SELECT category, ROUND(AVG(amount), 2) AS avg_spent FROM expenses_data GROUP BY category ORDER BY avg_spent DESC",
        
        "Cashback % of Spend by Category":
            "SELECT category, ROUND(SUM(cashback) * 100.0 / SUM(amount), 2) AS cashback_percentage FROM expenses_data GROUP BY category ORDER BY cashback_percentage DESC",
        
        "Top 5 Categories by Number of Transactions":
            "SELECT category, COUNT(*) AS transaction_count FROM expenses_data GROUP BY category ORDER BY transaction_count DESC LIMIT 5",

        "Number of Unique Descriptions per Category":
            "SELECT category, COUNT(DISTINCT description) AS unique_descriptions FROM expenses_data GROUP BY category ORDER BY unique_descriptions DESC"
    }

    selected_viz = st.selectbox("Choose a Visualization", list(viz_queries.keys()))
    df = get_data(viz_queries[selected_viz])

    # Visualizations
    if selected_viz == "Top 10 Highest Transactions by Amount":
        st.subheader(":moneybag: Top 10 Highest Transactions")
        st.dataframe(df)
        fig,ax=plt.subplots()
        sns.lineplot(data=df, x="Amount",y="Description",marker='o',ax=ax)
        ax.set_xlabel("Amount")
        ax.set_ylabel("Description")
        st.pyplot(fig)

    elif selected_viz == "Average Monthly Spending":
        st.subheader(":date: Average Monthly Spending")
        fig, ax = plt.subplots()
        sns.lineplot(data=df, x="month", y="avg_monthly_spent", marker="o", ax=ax)
        ax.set_xlabel("Month")
        ax.set_ylabel("Average Spend")
        st.pyplot(fig)

    elif selected_viz == "Categories with Most Cashback Earned":
        st.subheader(":trophy: Categories with Most Cashback Earned")
        fig, ax = plt.subplots()
        ax.pie(df["total_cashback"], labels=df["Category"], autopct='%1.1f%%', startangle=90)
        ax.axis('equal')
        st.pyplot(fig)

    elif selected_viz == "Spending Breakdown by Payment Mode per Month":
        st.subheader(":bar_chart: Monthly Spend by Payment Mode (Stacked Bar)")
        pivot_df = df.pivot(index="month", columns="Payment_Mode", values="total").fillna(0)
        fig, ax = plt.subplots()
        pivot_df.plot(kind="bar", stacked=True, ax=ax)
        ax.set_xlabel("Month")
        ax.set_ylabel("Total Spend")
        st.pyplot(fig)

    elif selected_viz == "Average Spending per Transaction in Each Category":
        st.subheader(":straight_ruler: Average Spend per Category")
        fig, ax = plt.subplots()
        sns.barplot(data=df, x="avg_spent", y="Category", palette="crest", ax=ax)
        ax.set_xlabel("Avg Spent")
        ax.set_ylabel("Category")
        st.pyplot(fig)

    elif selected_viz == "Cashback % of Spend by Category":
        st.subheader(":dart: Cashback as % of Spend")
        fig, ax = plt.subplots()
        sns.scatterplot(data=df, x="cashback_percentage", y="Category", size="cashback_percentage", hue="cashback_percentage", ax=ax, sizes=(50, 300))
        ax.set_xlabel("Cashback %")
        ax.set_ylabel("Category")
        st.pyplot(fig)

    elif selected_viz == "Top 5 Categories by Number of Transactions":
        st.subheader(":1234: Top 5 Categories by Transactions (Treemap)")
        import squarify
        fig, ax = plt.subplots()
        squarify.plot(sizes=df["transaction_count"], label=df["Category"], alpha=0.8)
        plt.axis('off')
        st.pyplot(fig)

    elif selected_viz == "Number of Unique Descriptions per Category":
        st.subheader(":fire: Heatmap: Unique Descriptions per Category")
        df = df.set_index("Category")
        fig, ax = plt.subplots()
        sns.heatmap(df, annot=True, cmap="YlGnBu", linewidths=0.5, ax=ax)
        st.pyplot(fig)

# PAGE 5: Creator Info
elif page == "Creator Info":
    st.title(":female-technologist: Creator Info")
    st.write("""
    **Developed by:** [Karuna]  
    **Skills:** Python, SQL, Streamlit, Data Analysis, Visualization  
    **GitHub:** [https://github.com/karuna-2828/Expense_Tracker--1-]
    """)


Overwriting StreamlitApp.py


In [None]:
!streamlit run StreamlitApp.py