<a href="https://colab.research.google.com/github/s4ngi/ISYS2001/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [61]:
# Installing required packages
!pip install pandas gradio hands_on_ai python-dateutil langchain openai faiss-cpu
# Gradio for UI
!pip install --quiet gradio

# Sentence embeddings and vector search for RAG
!pip install --quiet sentence-transformers faiss-cpu

# Your chatbot module (if pip-installable)
!pip install --quiet hands_on_ai

# Import core libraries
import pandas as pd
import gradio as gr
import os
import hands_on_ai as ha
from datetime import datetime
from hands_on_ai.chat import get_response
from dateutil import parser
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter

import warnings
warnings.filterwarnings('ignore')

print("Core libraries loaded successfully!")
print(f"Pandas version: {pd.__version__}")
print(f"Gradio version: {gr.__version__}")
print(f"OS version: {os.name}")
print(f"Hands-on-AI version: {ha.__version__}")
print("All libraries loaded successfully!")

from getpass import getpass

# Configure hands-on-ai server connection
os.environ['HANDS_ON_AI_SERVER'] = 'https://ollama.serveur.au'
os.environ['HANDS_ON_AI_MODEL'] = 'granite3.2'
os.environ['HANDS_ON_AI_API_KEY'] = 'isys2001-assignment-key'

print("Hands-on-AI configured successfully!")


Core libraries loaded successfully!
Pandas version: 2.2.2
Gradio version: 5.49.1
OS version: posix
Hands-on-AI version: 0.1.10
All libraries loaded successfully!
Hands-on-AI configured successfully!


In [65]:
import pandas as pd
import gradio as gr
from hands_on_ai.chat import get_response
import warnings
import re
from datetime import datetime
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

warnings.filterwarnings("ignore")

# ============================================
# ✅ CSV Handling and Analysis
# ============================================

def load_and_clean_csv(file):
    try:
        df = pd.read_csv(file.name)
        expected_columns = ["Date", "Amount", "Category", "Description"]
        if list(df.columns) != expected_columns:
            return None, None
        df["Date"] = pd.to_datetime(df["Date"].astype(str).str.strip(), dayfirst=True, errors="coerce")
        df["Amount"] = df["Amount"].replace('[\$,]', '', regex=True).astype(float)
        df = df.dropna(subset=["Date", "Amount"])
        if df.empty:
            return None, None
        transactions = df.to_dict(orient="records")
        return df, transactions
    except:
        return None, None

def summarize_expenses(df):
    total = df["Amount"].sum()
    category_totals = df.groupby("Category")["Amount"].sum().round(2).to_dict()
    category_averages = df.groupby("Category")["Amount"].mean().round(2).to_dict()
    return {
        "Total Spending": round(total, 2),
        "Category Totals": category_totals,
        "Category Averages": category_averages
    }

def monthly_summary(df):
    return {
        "Monthly Total": round(df["Amount"].sum(), 2),
        "Monthly Average": round(df["Amount"].mean(), 2)
    }

def generate_spending_advice(summary_dict):
    total = summary_dict.get("Total Spending", 0)
    category_totals = summary_dict.get("Category Totals", {})
    advice = []
    if total == 0 or not category_totals:
        return ["No spending data available."]
    for category, amount in category_totals.items():
        percent = (amount / total) * 100
        if percent >= 20:
            advice.append(f"{category} is {percent:.1f}% of your spending. Consider setting a limit.")
        elif percent >= 10:
            advice.append(f"{category} makes up {percent:.1f}%. Keep an eye on it.")
        else:
            advice.append(f"{category} is only {percent:.1f}%. No action needed.")
    return advice

def format_summary_table(df):
    summary = summarize_expenses(df)
    total = summary["Total Spending"]
    table_data = []
    for category, total_amount in summary["Category Totals"].items():
        avg_amount = summary["Category Averages"][category]
        percent = (total_amount / total) * 100
        highlight = "⚠️" if percent >= 20 else ""
        table_data.append([category, f"${total_amount:.2f} {highlight}", f"${avg_amount:.2f}"])
    return table_data

def format_monthly_table(df):
    monthly = monthly_summary(df)
    return [[f"${monthly['Monthly Total']:.2f}", f"${monthly['Monthly Average']:.2f}"]]

# ============================================
# 💬 Structured Chatbot Logic with Range-Aware Dates
# ============================================

def extract_dates_from_question(question):
    date_matches = re.findall(r"(\d{1,2}/\d{1,2}/\d{4})", question)
    dates = [datetime.strptime(d, "%d/%m/%Y").date() for d in date_matches]
    if len(dates) == 2:
        start, end = sorted(dates)
        return start, end
    elif len(dates) == 1:
        return dates[0], dates[0]
    else:
        return None, None

def filter_transactions(transactions, question):
    start_date, end_date = extract_dates_from_question(question)
    filtered = transactions

    if start_date and end_date:
        filtered = [t for t in filtered if start_date <= t["Date"].date() <= end_date]

    question_lower = question.lower()
    known_categories = set([t["Category"].lower() for t in transactions])
    matched_categories = [c for c in known_categories if c in question_lower]
    if matched_categories:
        filtered = [t for t in filtered if t["Category"].lower() in matched_categories]

    words_in_question = set(re.findall(r"\b\w+\b", question.lower()))
    filtered_desc = []
    for t in filtered:
        desc_words = set(re.findall(r"\b\w+\b", t["Description"].lower()))
        if words_in_question & desc_words:
            filtered_desc.append(t)
    if filtered_desc:
        filtered = filtered_desc

    return filtered

def format_transactions_for_prompt(transactions):
    if not transactions:
        return "No matching transactions found."
    lines = []
    for t in transactions:
        lines.append(
            f"Date: {t['Date'].strftime('%d/%m/%Y')}, "
            f"Amount: ${t['Amount']:.2f}, "
            f"Category: {t['Category']}, "
            f"Description: {t['Description']}"
        )
    return "\n".join(lines)

# ============================================
# 🧠 Local RAG Setup (FAISS + sentence-transformers)
# ============================================

class LocalRAG:
    def __init__(self):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.texts = []
        self.vectors = None
        self.index = None

    def add_texts(self, new_texts):
        self.texts.extend(new_texts)
        embeddings = self.model.encode(self.texts, convert_to_numpy=True)
        self.vectors = embeddings.astype('float32')
        self.index = faiss.IndexFlatL2(self.vectors.shape[1])
        self.index.add(self.vectors)

    def query(self, question, top_k=3):
        if not self.index or len(self.texts) == 0:
            return []
        q_vec = self.model.encode([question]).astype('float32')
        distances, indices = self.index.search(q_vec, top_k)
        return [self.texts[i] for i in indices[0] if i < len(self.texts)]

rag = LocalRAG()

# ============================================
# 💬 Financial Sage with RAG + Insights
# ============================================

def summarize_habits(transactions):
    if not transactions:
        return "No transactions to analyze."
    df = pd.DataFrame(transactions)
    category_totals = df.groupby("Category")["Amount"].sum()
    top_category = category_totals.idxmax()
    total_spent = df["Amount"].sum()
    num_transactions = len(df)
    avg_transaction = df["Amount"].mean()
    summary = (
        f"You have {num_transactions} transactions totaling ${total_spent:.2f}. "
        f"On average, you spend ${avg_transaction:.2f} per transaction. "
        f"You spend the most on {top_category} (${category_totals[top_category]:.2f})."
    )
    return summary

def financial_sage_rag(question, transactions):
    filtered = filter_transactions(transactions, question)
    habits_summary = summarize_habits(filtered)
    rag_results = rag.query(question)
    rag_text = "\n".join(rag_results) if rag_results else "No additional reference documents."

    prompt = (
        "You are the Financial Sage, a friendly guide who explains spending habits clearly.\n"
        f"Matching transactions:\n{format_transactions_for_prompt(filtered)}\n\n"
        f"Summary of habits:\n{habits_summary}\n\n"
        f"Additional references:\n{rag_text}\n\n"
        f"Answer the user's question: {question}"
    )
    return get_response(prompt)

def get_sage_response(question, transactions_state):
    if transactions_state:
        return financial_sage_rag(question, transactions_state)
    else:
        return get_response(question)

# ============================================
# 💰 Budget Feature - Single Total Budget
# ============================================

def calculate_savings(income, budget, total_expense):
    """
    Calculate savings and percentage of budget achieved.
    """
    savings = income - total_expense
    max_savings = income - budget if income - budget > 0 else 1  # avoid division by zero
    percent = min(max(savings / max_savings, 0), 1)
    return savings, percent

def check_budget(transactions, income, budget):
    """
    Returns a summary string and savings percentage for progress bar.
    """
    df = pd.DataFrame(transactions)
    total_expense = df["Amount"].sum()
    savings, percent = calculate_savings(income, budget, total_expense)

    advice = (
        f"Income: ${income:.2f}\n"
        f"Budget: ${budget:.2f}\n"
        f"Total Expenses: ${total_expense:.2f}\n"
        f"Savings: ${savings:.2f}"
    )

    return advice, percent

# ============================================
# 🧩 Gradio App Layout
# ============================================

with gr.Blocks(title="💰 Financial Sage Dashboard") as app:

    gr.Markdown("<h1 style='text-align:center'>💰 Financial Sage Dashboard</h1>")

    transactions_state = gr.State(value=None)
    chat_history = gr.State(value=[])

    with gr.Row():
        # CSV Dashboard
        with gr.Column(scale=1, min_width=450):
            gr.Markdown("### 📁 Upload Your CSV")
            file_input = gr.File(file_types=[".csv"])
            summary_output = gr.Dataframe(headers=["Category","Total Spending","Average Spending"], interactive=False)
            monthly_output = gr.Dataframe(headers=["Monthly Total","Monthly Average"], interactive=False)
            advice_output = gr.Textbox(lines=6, interactive=False)


            def handle_csv(file):
                df, transactions = load_and_clean_csv(file)
                if df is None:
                    return [], [], "Invalid CSV file.", None
                # Add CSV as text to RAG
                rag.add_texts(df.astype(str).apply(lambda row: ' | '.join(row), axis=1).tolist())
                return (
                    format_summary_table(df),
                    format_monthly_table(df),
                    "\n".join(generate_spending_advice(summarize_expenses(df))),
                    transactions
                )

            file_input.change(
                fn=handle_csv,
                inputs=file_input,
                outputs=[summary_output, monthly_output, advice_output, transactions_state]
            )

        # Chatbot
        with gr.Column(scale=1, min_width=400):
            gr.Markdown("### 💬 Financial Sage Chat")
            chat_output = gr.Chatbot()
            question_input = gr.Textbox(placeholder="Ask anything about your spending or habits...", lines=2)
            chat_button = gr.Button("Ask the Sage 💭")

            def chat_with_history(question, transactions_state, history):
                answer = get_sage_response(question, transactions_state)
                history = history + [(question, answer)]
                return history, history, ""

            chat_button.click(
                fn=chat_with_history,
                inputs=[question_input, transactions_state, chat_history],
                outputs=[chat_output, chat_history, question_input]
            )

            question_input.submit(
                fn=chat_with_history,
                inputs=[question_input, transactions_state, chat_history],
                outputs=[chat_output, chat_history, question_input]
            )

app.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://48d9da691612266be4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [68]:
import pandas as pd
import gradio as gr
from hands_on_ai.chat import get_response
import warnings
import re
from datetime import datetime
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

warnings.filterwarnings("ignore")

# ============================================
# ✅ CSV Handling and Analysis
# ============================================

def load_and_clean_csv(file):
    try:
        df = pd.read_csv(file.name)
        expected_columns = ["Date", "Amount", "Category", "Description"]
        if list(df.columns) != expected_columns:
            return None, None
        df["Date"] = pd.to_datetime(df["Date"].astype(str).str.strip(), dayfirst=True, errors="coerce")
        df["Amount"] = df["Amount"].replace('[\$,]', '', regex=True).astype(float)
        df = df.dropna(subset=["Date", "Amount"])
        if df.empty:
            return None, None
        transactions = df.to_dict(orient="records")
        return df, transactions
    except:
        return None, None

def summarize_expenses(df):
    total = df["Amount"].sum()
    category_totals = df.groupby("Category")["Amount"].sum().round(2).to_dict()
    category_averages = df.groupby("Category")["Amount"].mean().round(2).to_dict()
    return {
        "Total Spending": round(total, 2),
        "Category Totals": category_totals,
        "Category Averages": category_averages
    }

def monthly_summary(df):
    return {
        "Monthly Total": round(df["Amount"].sum(), 2),
        "Monthly Average": round(df["Amount"].mean(), 2)
    }

def generate_spending_advice(summary_dict):
    total = summary_dict.get("Total Spending", 0)
    category_totals = summary_dict.get("Category Totals", {})
    advice = []
    if total == 0 or not category_totals:
        return ["No spending data available."]
    for category, amount in category_totals.items():
        percent = (amount / total) * 100
        if percent >= 20:
            advice.append(f"{category} is {percent:.1f}% of your spending. Consider setting a limit.")
        elif percent >= 10:
            advice.append(f"{category} makes up {percent:.1f}%. Keep an eye on it.")
        else:
            advice.append(f"{category} is only {percent:.1f}%. No action needed.")
    return advice

def format_summary_table(df):
    summary = summarize_expenses(df)
    total = summary["Total Spending"]
    table_data = []
    for category, total_amount in summary["Category Totals"].items():
        avg_amount = summary["Category Averages"][category]
        percent = (total_amount / total) * 100
        highlight = "⚠️" if percent >= 20 else ""
        table_data.append([category, f"${total_amount:.2f} {highlight}", f"${avg_amount:.2f}"])
    return table_data

def format_monthly_table(df):
    monthly = monthly_summary(df)
    return [[f"${monthly['Monthly Total']:.2f}", f"${monthly['Monthly Average']:.2f}"]]

# ============================================
# 💬 Structured Chatbot Logic with Range-Aware Dates
# ============================================

def extract_dates_from_question(question):
    date_matches = re.findall(r"(\d{1,2}/\d{1,2}/\d{4})", question)
    dates = [datetime.strptime(d, "%d/%m/%Y").date() for d in date_matches]
    if len(dates) == 2:
        start, end = sorted(dates)
        return start, end
    elif len(dates) == 1:
        return dates[0], dates[0]
    else:
        return None, None

def filter_transactions(transactions, question):
    start_date, end_date = extract_dates_from_question(question)
    filtered = transactions

    if start_date and end_date:
        filtered = [t for t in filtered if start_date <= t["Date"].date() <= end_date]

    question_lower = question.lower()
    known_categories = set([t["Category"].lower() for t in transactions])
    matched_categories = [c for c in known_categories if c in question_lower]
    if matched_categories:
        filtered = [t for t in filtered if t["Category"].lower() in matched_categories]

    words_in_question = set(re.findall(r"\b\w+\b", question.lower()))
    filtered_desc = []
    for t in filtered:
        desc_words = set(re.findall(r"\b\w+\b", t["Description"].lower()))
        if words_in_question & desc_words:
            filtered_desc.append(t)
    if filtered_desc:
        filtered = filtered_desc

    return filtered

def format_transactions_for_prompt(transactions):
    if not transactions:
        return "No matching transactions found."
    lines = []
    for t in transactions:
        lines.append(
            f"Date: {t['Date'].strftime('%d/%m/%Y')}, "
            f"Amount: ${t['Amount']:.2f}, "
            f"Category: {t['Category']}, "
            f"Description: {t['Description']}"
        )
    return "\n".join(lines)

# ============================================
# 🧠 Local RAG Setup (FAISS + sentence-transformers)
# ============================================

class LocalRAG:
    def __init__(self):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.texts = []
        self.vectors = None
        self.index = None

    def add_texts(self, new_texts):
        self.texts.extend(new_texts)
        embeddings = self.model.encode(self.texts, convert_to_numpy=True)
        self.vectors = embeddings.astype('float32')
        self.index = faiss.IndexFlatL2(self.vectors.shape[1])
        self.index.add(self.vectors)

    def query(self, question, top_k=3):
        if not self.index or len(self.texts) == 0:
            return []
        q_vec = self.model.encode([question]).astype('float32')
        distances, indices = self.index.search(q_vec, top_k)
        return [self.texts[i] for i in indices[0] if i < len(self.texts)]

rag = LocalRAG()

# ============================================
# 💬 Financial Sage with RAG + Insights
# ============================================

def summarize_habits(transactions):
    if not transactions:
        return "No transactions to analyze."
    df = pd.DataFrame(transactions)
    category_totals = df.groupby("Category")["Amount"].sum()
    top_category = category_totals.idxmax()
    total_spent = df["Amount"].sum()
    num_transactions = len(df)
    avg_transaction = df["Amount"].mean()
    summary = (
        f"You have {num_transactions} transactions totaling ${total_spent:.2f}. "
        f"On average, you spend ${avg_transaction:.2f} per transaction. "
        f"You spend the most on {top_category} (${category_totals[top_category]:.2f})."
    )
    return summary

def financial_sage_rag(question, transactions):
    filtered = filter_transactions(transactions, question)
    habits_summary = summarize_habits(filtered)
    rag_results = rag.query(question)
    rag_text = "\n".join(rag_results) if rag_results else "No additional reference documents."

    prompt = (
        "You are the Financial Sage, a friendly guide who explains spending habits clearly.\n"
        f"Matching transactions:\n{format_transactions_for_prompt(filtered)}\n\n"
        f"Summary of habits:\n{habits_summary}\n\n"
        f"Additional references:\n{rag_text}\n\n"
        f"Answer the user's question: {question}"
    )
    return get_response(prompt)

def get_sage_response(question, transactions_state):
    if transactions_state:
        return financial_sage_rag(question, transactions_state)
    else:
        return get_response(question)

# ============================================
# 💰 Budget Feature - Single Total Budget
# ============================================

def calculate_savings(income, budget, total_expense):
    """
    Calculate savings and percentage of budget achieved.
    """
    savings = income - total_expense
    max_savings = income - budget if income - budget > 0 else 1  # avoid division by zero
    percent = min(max(savings / max_savings, 0), 1)
    return savings, percent

def check_budget(transactions, income, budget):
    """
    Returns a summary string and savings percentage for progress bar.
    """
    df = pd.DataFrame(transactions)
    total_expense = df["Amount"].sum()
    savings, percent = calculate_savings(income, budget, total_expense)

    advice = (
        f"Income: ${income:.2f}\n"
        f"Budget: ${budget:.2f}\n"
        f"Total Expenses: ${total_expense:.2f}\n"
        f"Savings: ${savings:.2f}"
    )

    return advice, percent

# ============================================
# 🧩 Gradio App Layout
# ============================================

with gr.Blocks(title="💰 Financial Sage Dashboard") as app:

    gr.Markdown("<h1 style='text-align:center'>💰 Financial Sage Dashboard</h1>")

    transactions_state = gr.State(value=None)
    chat_history = gr.State(value=[])

    with gr.Row():
    # CSV Dashboard
        with gr.Column(scale=1, min_width=450):
            gr.Markdown("### 📁 Upload Your CSV")
            file_input = gr.File(file_types=[".csv"])
            summary_output = gr.Dataframe(headers=["Category","Total Spending","Average Spending"], interactive=False)
            monthly_output = gr.Dataframe(headers=["Monthly Total","Monthly Average"], interactive=False)
            advice_output = gr.Textbox(lines=6, interactive=False)
            top_category_output = gr.Textbox(label="🏆 Top Spending Category", interactive=False)  # <-- new

            def handle_csv(file):
                df, transactions = load_and_clean_csv(file)
                if df is None:
                    return [], [], "Invalid CSV file.", None, ""

                # Add CSV as text to RAG
                rag.add_texts(df.astype(str).apply(lambda row: ' | '.join(row), axis=1).tolist())

                # Determine Top Spending Category
                category_totals = summarize_expenses(df)["Category Totals"]
                top_category = max(category_totals, key=category_totals.get) if category_totals else "N/A"

                return (
                    format_summary_table(df),
                    format_monthly_table(df),
                    "\n".join(generate_spending_advice(summarize_expenses(df))),
                    transactions,
                    top_category
                )

            file_input.change(
                fn=handle_csv,
                inputs=file_input,
                outputs=[summary_output, monthly_output, advice_output, transactions_state, top_category_output]  # <-- updated
            )


        # Chatbot
        with gr.Column(scale=1, min_width=400):
            gr.Markdown("### 💬 Financial Sage Chat")
            chat_output = gr.Chatbot()
            question_input = gr.Textbox(placeholder="Ask anything about your spending or habits...", lines=2)
            chat_button = gr.Button("Ask the Sage 💭")

            def chat_with_history(question, transactions_state, history):
                answer = get_sage_response(question, transactions_state)
                history = history + [(question, answer)]
                return history, history, ""

            chat_button.click(
                fn=chat_with_history,
                inputs=[question_input, transactions_state, chat_history],
                outputs=[chat_output, chat_history, question_input]
            )

            question_input.submit(
                fn=chat_with_history,
                inputs=[question_input, transactions_state, chat_history],
                outputs=[chat_output, chat_history, question_input]
            )

app.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d1fff06d904be3880e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


