In [2]:
pip install gradio

Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import gradio as gr
import io
from PIL import Image

# Train the AI Model
def train_model(data):
    encoder = LabelEncoder()
    data['Category'] = encoder.fit_transform(data['Category'])

    # Preprocess text using TF-IDF
    vectorizer = TfidfVectorizer(stop_words='english')
    X_text = vectorizer.fit_transform(data['Description'])

    # Combine TF-IDF with Amount
    X_amount = data['Amount'].values.reshape(-1, 1)
    X = np.hstack([X_text.toarray(), X_amount])

    # Target variable
    y = data['Category']

    # Train SVM model
    model = SVC(kernel='linear', random_state=42)
    model.fit(X, y)

    return model, vectorizer, encoder

# Generate Suggestions
def generate_suggestions(category_summary, total_spent):
    suggestions = []
    for category, amount in category_summary.items():
        percentage = (amount / total_spent) * 100
        if percentage > 30:
            suggestions.append(f"You might be spending too much on {category} ({percentage:.1f}%). Consider ways to reduce it.")
    return suggestions

# Process Expenses and Predict Categories
def process_expenses(file, model, vectorizer, encoder):
    try:
        # Read the uploaded CSV file
        data = pd.read_csv(file)

        if not all(col in data.columns for col in ['Description', 'Amount']):
            return "Error: CSV must contain 'Description' and 'Amount' columns.", None, None

        # Predict categories if not provided
        if 'Category' not in data.columns:
            text_features = vectorizer.transform(data['Description'])
            amount_features = data['Amount'].values.reshape(-1, 1)
            combined_features = np.hstack([text_features.toarray(), amount_features])
            data['Category'] = encoder.inverse_transform(model.predict(combined_features))

        # Summarize expenses
        summary = data.groupby('Category')['Amount'].sum()
        total_expenses = summary.sum()
        suggestions = generate_suggestions(summary, total_expenses)

        # Create visualizations
        plt.figure(figsize=(10, 6))
        sns.barplot(x=summary.index, y=summary.values)
        plt.title('Monthly Expense Distribution')
        plt.xlabel('Category')
        plt.ylabel('Amount ($)')
        bar_graph_io = io.BytesIO()
        plt.savefig(bar_graph_io, format='png')
        bar_graph_io.seek(0)
        bar_graph_image = Image.open(bar_graph_io)
        plt.close()

        plt.figure(figsize=(8, 8))
        plt.pie(summary.values, labels=summary.index, autopct='%1.1f%%', startangle=90)
        plt.title('Monthly Expense Breakdown')
        pie_chart_io = io.BytesIO()
        plt.savefig(pie_chart_io, format='png')
        pie_chart_io.seek(0)
        pie_chart_image = Image.open(pie_chart_io)
        plt.close()

        summary_text = "\n".join([f"{cat}: {amount}$" for cat, amount in summary.items()])
        suggestion_text = "Suggestions:\n" + "\n".join(suggestions)

        output = f"{summary_text}\n\n{suggestion_text}"

        return output, bar_graph_image, pie_chart_image

    except Exception as e:
        return f"Error processing file: {e}", None, None

# Interface
def setup_interface():

    model, vectorizer, encoder = train_model(pd.read_csv('replace here expenses_.csv file in github'))

    # Gradio interface
    interface = gr.Interface(
        fn=lambda file: process_expenses(file, model, vectorizer, encoder),
        inputs=gr.File(label="Upload CSV File"),
        outputs=[
            gr.Textbox(label="Expense Summary and Suggestions"),
            gr.Image(label="Expense Distribution Bar Chart"),
            gr.Image(label="Expense Breakdown Pie Chart")
        ],
        title="AI-Powered Expense Tracker",
        description="Upload a CSV file with 'Description' and 'Amount' columns. Optional: Include 'Category' for better insights."
    )
    return interface

interface = setup_interface()
interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d0e8afa05694633ed0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


