In [None]:
%pip install --quiet google-cloud-documentai==2.31.0

import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()
    print("Authenticated")

PROJECT_ID = "<YOUR_GCP_PROJECT_NAME>"
PROJECT_NUMBER = "<YOUR_GCP_PROJECT_NUMBER>"
DATASET_ID = "<YOUR_DATASET_ID>"

!bq mk --connection --connection_type=CLOUD_RESOURCE --location=eu --project_id={PROJECT_ID} "docai_conn"
!bq show --location=eu --connection --project_id={PROJECT_ID} "docai_conn"

connection_service_account = "<SERVICE_ACCOUNT_FROM_PREVIOUS_STEP>"  # @param {type: "string"}
connection_member = f"serviceAccount:{connection_service_account}"


!gcloud projects add-iam-policy-binding {PROJECT_ID} --member={connection_member} --role='roles/documentai.viewer' --condition=None --quiet
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member={connection_member} --role='roles/storage.objectViewer' --condition=None --quiet
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member={connection_member} --role='roles/aiplatform.user' --condition=None --quiet

# Create a unique Cloud Storage bucket name
bucket_name = f"{PROJECT_ID}-eu-sow-uploads"

# Create the bucket
!gsutil mb -l eu -p {PROJECT_ID} gs://{bucket_name}

from google.api_core.client_options import ClientOptions
from google.cloud import documentai

location = "eu"
processor_display_name = "layout_eu_parser_processor"
processor_type = "LAYOUT_PARSER_PROCESSOR"


def create_processor_sample(
    PROJECT_ID: str, location: str, processor_display_name: str, processor_type: str
) -> None:
    opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")

    client = documentai.DocumentProcessorServiceClient(client_options=opts)

    # The full resource name of the location
    parent = client.common_location_path(PROJECT_ID, location)

    # Create a processor
    processor = client.create_processor(
        parent=parent,
        processor=documentai.Processor(
            display_name=processor_display_name, type_=processor_type
        ),
    )

    # Return the processor ID needed for creating a BigQuery connection
    return processor.name.split("/")[-1]


# Call this function to create the processor and return its ID
processor_id = create_processor_sample(
    PROJECT_ID, location, processor_display_name, processor_type
)

query = f"""
CREATE OR REPLACE MODEL `{dataset_id}.layout_parser`
REMOTE WITH CONNECTION `<CONNECTION_NAME_FROM_EARLIER_STEP>`
OPTIONS(remote_service_type="CLOUD_AI_DOCUMENT_V1", document_processor="{processor_id}")
"""

query_job = client.query(query)  # API request
query_job.result()  # Waits for the query to complete

print("Remote model docai_demo.layout_parser created or replaced successfully.")

%%bigquery --project $PROJECT_ID

CREATE OR REPLACE MODEL `{dataset_id}.embedding_model`
REMOTE WITH CONNECTION `<CONNECTION_NAME_FROM_EARLIER_STEP>` OPTIONS(endpoint="text-embedding-004")

%%bigquery --project $PROJECT_ID

CREATE OR REPLACE MODEL `{dataset_id}.gemini_flash` REMOTE
WITH CONNECTION `<CONNECTION_NAME_FROM_EARLIER_STEP>` OPTIONS(endpoint="gemini-1.5-flash")