In [4]:
!sudo apt install tesseract-ocr
!pip install pyautogen pillow pytesseract

import os
import json
from PIL import Image
import pytesseract
import autogen
from autogen import GroupChat, GroupChatManager, UserProxyAgent, AssistantAgent


os.environ['OPENAI_API_KEY'] = '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'

config_list = [
    {
        'model': 'gpt-4',
        'api_key': os.environ['OPENAI_API_KEY']
    }
]

llm_config = {
    "config_list": config_list,
    "temperature": 0
}

def process_bill_images(image_paths):
    expenses = []
    for path in image_paths:
        try:
            img = Image.open(path)
            text = pytesseract.image_to_string(img)


            prompt = f"""Analyze this bill text and extract expenses:
            {text}

            Return JSON format with:
            - description (item/service name)
            - amount (numeric value)
            - category (groceries/dining/utilities/shopping/entertainment/other)"""


            from openai import OpenAI
            client = OpenAI()
            response = client.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )

            items = json.loads(response.choices[0].message.content)
            expenses.extend(items)
        except Exception as e:
            print(f"Error processing {path}: {str(e)}")

    # Categorize expenses
    categories = {
        'groceries': 0.0,
        'dining': 0.0,
        'utilities': 0.0,
        'shopping': 0.0,
        'entertainment': 0.0,
        'other': 0.0
    }

    for item in expenses:
        cat = item['category'].lower()
        categories[cat] += float(item['amount']) if cat in categories else categories['other']

    return {
        'raw_expenses': expenses,
        'category_totals': categories
    }

def summarize_expenses(categorized_data):
    totals = categorized_data['category_totals']
    summary = "📊 Spending Summary:\n\n"

    # Total spending
    total = sum(totals.values())
    summary += f"💵 Total Expenditure: ${total:.2f}\n\n"

    # Breakdown by category
    summary += "📈 Category Breakdown:\n"
    for cat, amount in totals.items():
        if amount > 0:
            summary += f"- {cat.capitalize()}: ${amount:.2f}\n"

    # Highlight highest category
    max_cat = max(totals, key=totals.get)
    summary += f"\n🔥 Highest Spending: {max_cat.capitalize()} (${totals[max_cat]:.2f})\n"

    # Alert for unusual spending
    avg = total / len([v for v in totals.values() if v > 0])
    alerts = [cat for cat, amt in totals.items() if amt > 3 * avg]
    if alerts:
        summary += "\n🚨 Unusual Spending Alert:\n"
        summary += "\n".join(f"- {cat.capitalize()}" for cat in alerts)

    return summary

# Create Agents
user_proxy = UserProxyAgent(
    name="User_Proxy",
    human_input_mode="NEVER",
    system_message="A user proxy that initiates bill processing by providing image paths.",
    code_execution_config=False
)

bill_processor = AssistantAgent(
    name="Bill_Processor",
    system_message="You process bill images using OCR and LLM extraction. Use process_bill_images function for all image processing tasks.",
    llm_config=llm_config,
    function_map={"process_bill_images": process_bill_images}
)

expense_analyst = AssistantAgent(
    name="Expense_Analyst",
    system_message="You analyze categorized expenses and generate summaries. Use summarize_expenses function for all analysis tasks.",
    llm_config=llm_config,
    function_map={"summarize_expenses": summarize_expenses}
)

# Configure Group Chat
group_chat = GroupChat(
    agents=[user_proxy, bill_processor, expense_analyst],
    messages=[],
    max_round=6
)

manager = GroupChatManager(
    groupchat=group_chat,
    llm_config=llm_config
)

# Initiate chat with example image (upload your bills in Colab first)
user_proxy.initiate_chat(
    manager,
    message="Please process these bill images: ['/content/bill.jpg']"
)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.
User_Proxy (to chat_manager):

Please process these bill images: ['/content/bill.jpg']

--------------------------------------------------------------------------------

Next speaker: Bill_Processor

Bill_Processor (to chat_manager):

Sure, to process the bill images, we will use the `process_bill_images` function. However, as an AI model, I don't have the ability to execute code. Here's how you can do it:

```python
# Assuming you have a function defined as process_bill_images
processed_bills = process_bill_images(['/content/bill.jpg'])
```

This function will process the image at the path '/content/bill.jpg' using OCR and LLM extraction. Please make sure that the function `process_bill_images` is properly defined and the path to the image is correct.

------------

ChatResult(chat_id=None, chat_history=[{'content': "Please process these bill images: ['/content/bill.jpg']", 'role': 'assistant', 'name': 'User_Proxy'}, {'content': "Sure, to process the bill images, we will use the `process_bill_images` function. However, as an AI model, I don't have the ability to execute code. Here's how you can do it:\n\n```python\n# Assuming you have a function defined as process_bill_images\nprocessed_bills = process_bill_images(['/content/bill.jpg'])\n```\n\nThis function will process the image at the path '/content/bill.jpg' using OCR and LLM extraction. Please make sure that the function `process_bill_images` is properly defined and the path to the image is correct.", 'name': 'Bill_Processor', 'role': 'user'}, {'content': '', 'role': 'assistant', 'name': 'User_Proxy'}, {'content': '', 'role': 'assistant', 'name': 'User_Proxy'}, {'content': '', 'role': 'assistant', 'name': 'User_Proxy'}, {'content': '', 'role': 'assistant', 'name': 'User_Proxy'}], summary='', 