In [3]:
!pip install -q google-cloud-documentai

In [4]:
from google.cloud import documentai_v1 as documentai
import pandas as pd

In [5]:
PROJECT_ID = "YOUR_PROJECT_HERE"
LOCATION = "us"
PROCESSOR_ID = "YOUR_PROCESSOR_HERE"

In [6]:
FILE_PATH = "intake-form.pdf"
MIME_TYPE = "application/pdf"

In [8]:
opts = {"api_endpoint": f"{LOCATION}-documentai.googleapis.com"}

documentai_client = documentai.DocumentProcessorServiceClient(client_options=opts)
resource_name = documentai_client.processor_path(PROJECT_ID, LOCATION, PROCESSOR_ID)

In [9]:
with open(FILE_PATH, "rb") as image:
    image_content = image.read()
    
    raw_document = documentai.RawDocument(
        content=image_content,
        mime_type=MIME_TYPE
    )
    
    request = documentai.ProcessRequest(
        name=resource_name,
        raw_document=raw_document
    )
    
    result = documentai_client.process_document(request=request)

In [11]:
document = result.document

In [12]:
names = []
name_confidence = []
values = []
value_confidence = []

for page in document.pages:
    for field in page.form_fields:
        # Get the extracted field names
        names.append(field.field_name.text_anchor.content)
        # Confidence - How "sure" the Model is that the text is correct
        name_confidence.append(field.field_name.confidence)

        values.append(field.field_value.text_anchor.content)
        value_confidence.append(field.field_value.confidence)

# Create a Pandas Dataframe to print the values in tabular format.
df = pd.DataFrame(
    {
        "Field Name": names,
        "Field Name Confidence": name_confidence,
        "Field Value": values,
        "Field Value Confidence": value_confidence,
    }
)

print(df)

                                           Field Name  Field Name Confidence  \
0   Are you currently taking any medication? (If y...               0.943337   
1                                          _Phone #:                0.932336   
2                                               Zip:                0.914201   
3                                              City:                0.900499   
4                                             State:                0.893907   
5                                               DOB:                0.885175   
6                                            Gender:                0.882370   
7                                             Name:\n               0.872788   
8                                   Marital Status:\n               0.852380   
9   Describe your medical concerns (symptoms, diag...               0.843905   
10                                            Date:\n               0.829963   
11                                      