# GenAI Document Processing: Expense processing:

Based on the AWS Blog: https://aws.amazon.com/blogs/machine-learning/intelligent-document-processing-using-amazon-bedrock-and-anthropic-claude/

In [17]:
import boto3
import json
from PIL import Image
import base64

In [20]:
s3 = boto3.client('s3')
bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')

MODEL_ID = "anthropic.claude-3-sonnet-20240229-v1:0"


In [26]:
# image_data = s3.get_object(Bucket=bucket_name, Key=object_key)['Body'].read()

image_file = "./docs/test_invoice.png"

with open(image_file, "rb") as image:
  img = image.read()
  img_bytes = bytearray(img)

base64_image = base64.b64encode(img_bytes).decode('utf-8')


In [27]:
# prompt:

prompt = """
This image shows a an invoice. 
Please precisely copy all the relevant information from the form.
Leave the field blank if there is no information in corresponding field.
If the image is not an invoice, simply return an empty JSON object. 
If the invoice is not filled, leave the fees attributes blank. 
Translate any non-English text to English. 
Organize and return the extracted data in a JSON format with the following keys:
{
    "InvoiceDetails":{
        "invoiceNum": "",
        "date": "",
        "billToAddress": "",
        "city": "",
        "state": "",
        "zipCode": "",
        "phone": "",
        "email":""
    },
    "items":[{
        "ItemDescription": "",
        "quantity": "",
        "unitPrice": "",
        "amount": "",
    }
    ],
    "total": "",
    "currency": ""
  }
""" 



In [None]:
def invoke_claude_3_multimodal(prompt, base64_image_data):
    request_body = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 2048,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt,
                    },
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/png",
                            "data": base64_image_data,
                        },
                    },
                ],
            }
        ],
    }

    try:
        response = bedrock.invoke_model(modelId=MODEL_ID, body=json.dumps(request_body))
        return json.loads(response['body'].read())
    except bedrock.exceptions.ClientError as err:
        print(f"Couldn't invoke Claude 3 Sonnet. Here's why: {err.response['Error']['Code']}: {err.response['Error']['Message']}")
        raise
