# Processing Invoices Using Box's MCP Server

In [15]:
import os
import json
from dotenv import load_dotenv
from google import genai
from google.genai import types
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from database import setup_database, update_invoice_in_database, generate_report
from model import generate

load_dotenv(override=True)

True

Let's now configure a few settings that we'll use throughout the notebook.

In [16]:
BOX_CLIENT_ID = os.getenv("BOX_CLIENT_ID")
BOX_CLIENT_SECRET = os.getenv("BOX_CLIENT_SECRET")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
BOX_FOLDER_ID = os.getenv("BOX_FOLDER_ID")
BOX_MCP_SERVER_PATH = "/Users/svpino/dev/mcp-server-box"
BOX_MCP_TOOLS = [
    "box_who_am_i",
    "box_list_folder_content_by_folder_id",
    "box_ai_extract_tool",
]

## Configuring Box' MCP Server

First, let's configure Gemini and prepare the connection to the Box's MCP Server.

In [17]:
client = genai.Client(api_key=GEMINI_API_KEY)

server_params = StdioServerParameters(
    command="uv",
    args=[
        "--directory",
        BOX_MCP_SERVER_PATH,
        "run",
        "src/mcp_server_box.py",
    ],
)

Now, let's create a function to set up the list of MCP tools that will be used to interact with Box.

In [18]:
async def get_mcp_tools(session: ClientSession):
    """
    Set up the list of MCP tools that will be used to interact with Box.
    """
    mcp_tools = await session.list_tools()
    return [
        types.Tool(
            function_declarations=[
                {
                    "name": tool.name,
                    "description": tool.description,
                    "parameters": {p: v for p, v in tool.inputSchema.items()},
                }
            ]
        )
        for tool in mcp_tools.tools
        if tool.name in BOX_MCP_TOOLS
    ]

## Processing invoices

Let's start by defining the function that will extract the data from a given invoice.

In [19]:
async def extract_invoice_fields(
    invoice: dict, client: genai.Client, session: ClientSession, tools: list
):
    """
    Extract data from the given invoice.
    """
    print(f'Extracting data from invoice "{invoice["name"]}"...')
    prompt = (
        "Extract the following fields from the invoice "
        f"with file_id {invoice['id']}: "
        "client_name, invoice_amount, product_name. "
        "Return the invoice_amount as a float. "
    )
    response = await generate(prompt, client, session, tools)
    result = json.loads(response[0]["answer"])
    result["file"] = invoice["name"]
    return result


Let's set up the database where we'll store the information of every invoice.

In [20]:
connection = setup_database()

Let's create a Gemini session with access to Box's MCP Server, list the invoices stored in Box and extract the data from each of them.

In [None]:
async with stdio_client(server_params) as (read, write):
    async with ClientSession(read, write) as session:
        await session.initialize()
        tools = await get_mcp_tools(session)

        # Get the list of invoices from the Box folder.
        invoices = await generate(
            f"List the content of the folder with id {BOX_FOLDER_ID}",
            client,
            session,
            tools,
        )
        print(f"Found {len(invoices)} invoices")

        # Process each invoice and extract the required fields.
        for invoice in invoices:
            invoice_data = await extract_invoice_fields(invoice, client, session, tools)
            print(f"Extracted data: {invoice_data}")
            update_invoice_in_database(connection, invoice_data)

[Tool(
  function_declarations=[
    FunctionDeclaration(
      description="""
Get the current user's information.
This is also useful to check the connection status.

return:
    dict: The current user's information.
""",
      name='box_who_am_i',
      parameters=Schema(
        properties={},
        title='box_who_am_iArguments',
        type=<Type.OBJECT: 'OBJECT'>
      )
    ),
  ]
), Tool(
  function_declarations=[
    FunctionDeclaration(
      description=""""
Extract data from a single file in Box using AI.

Args:
    file_id (str): The ID of the file to read.
    fields (str): The fields to extract from the file.
return:
    dict: The extracted data in a json string format.
""",
      name='box_ai_extract_tool',
      parameters=Schema(
        properties={
          'fields': Schema(
            title='Fields',
            type=<Type.STRING: 'STRING'>
          ),
          'file_id': Schema(
            title='File Id',
            type=<Type.STRING: 'STRING'>
         

  + Exception Group Traceback (most recent call last):
  |   File "/Users/svpino/dev/box-course/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3697, in run_code
  |     await eval(code_obj, self.user_global_ns, self.user_ns)
  |   File "/var/folders/d2/qgdrwwkd46190hx5_bvrqc380000gn/T/ipykernel_95830/1915772850.py", line 1, in <module>
  |     async with stdio_client(server_params) as (read, write):
  |                ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/Users/svpino/.local/share/uv/python/cpython-3.12.11-macos-aarch64-none/lib/python3.12/contextlib.py", line 231, in __aexit__
  |     await self.gen.athrow(value)
  |   File "/Users/svpino/dev/box-course/.venv/lib/python3.12/site-packages/mcp/client/stdio/__init__.py", line 173, in stdio_client
  |     anyio.create_task_group() as tg,
  |     ^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/Users/svpino/dev/box-course/.venv/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 772, in __aexit__
  |     ra

## Generating final report

Finally, we want to generate a report with the data that we stored in the database.

In [14]:
generate_report(connection)
connection.close()


Invoice Report
* Total invoices: 4
* Total amount: 908.5

Breakdown by client:
* Acme Inc: 2 invoices ($800.0)
* Santiago Valdarrama: 1 invoices ($15.0)
* Test Business: 1 invoices ($93.5)
