## Update your GroundX client

Current version is 3.2.8.

In [None]:
pip install -U "groundx[extract]" && pip install ipywidgets smolagents

In [None]:
pip show groundx

## Initialize Client and Prompt Manager

Update with your API key and GroundX route (URL). You can find the GroundX route in the OpenShift console. Add the **/api** to the end of the route.

**Example:** `https://groundx-eyelevel.apps.your-ocp-cluster.com/api`

If you are working with an existing bucket and workflow, you must set the bucket_id and workflow_id.

In [None]:
import typing

### set this to the local yaml file and path (leave off the .yaml from the file name though)

cache_path = "./prompts"
file_name = "simple"

###

### set these if working with specific files

document_id: typing.Optional[str] = None
process_id: typing.Optional[str] = None

###

from groundx import GroundX

gx_client = GroundX(
    # from values/values.groundx.secret.yaml, "GROUNDX_ADMIN_API_KEY" 
    api_key="00000000-0000-0000-0000-000000000001",
    # route for groundx pod, can be found by running "oc get routes groundx"
    # don't forget /api
    base_url="https://groundx-eyelevel.apps.ai-dev02.kni.syseng.devcluster.openshift.com/api",
)

from groundx.extract import Logger, Source
from manager import ExtractPromptManager

logger = Logger(name="manage-workflows", level="info")
prompt_manager = ExtractPromptManager(
    cache_source=Source(
        logger=logger,
        cache_path=cache_path,
    ),
    config_source=Source(
        logger=logger,
        cache_path=cache_path,
    ),
    logger=logger,
    default_file_name=file_name,
    default_workflow_id=file_name,
    gx_client=gx_client,
)

## Create a Bucket (optional)

This is an optional step. If you do not already have a test bucket to work with.

In [None]:
res = gx_client.buckets.create(
        name="workflow-test",
    )

if res.bucket:
    print(f"bucket_id=[{res.bucket.bucket_id}]")

    bucket_id = res.bucket.bucket_id
else:
    print(res)

## List Existing Buckets (optional)

An optional step to see the existing buckets.

In [None]:
res = gx_client.buckets.list()
if res.buckets:
    for b in res.buckets:
        print(f"[{b.bucket_id}]\t\t[{b.name}]")

## Create a Workflow (optional)

If you have not done so, you should create a workflow and apply it to your account or a bucket (described in subsequent steps).

In [None]:
res = gx_client.workflows.create(
    chunk_strategy="element",
    name=file_name,
    # loads extract prompt from `{cache_path}/{file_name}.yaml`
    steps=prompt_manager.workflow_steps(file_name=file_name),
    # configures workflow to be an `extract` workflow
    extract=prompt_manager.workflow_extract_dict(file_name=file_name),
)

workflow_id = res.workflow.workflow_id

print(f"[{res.workflow.workflow_id}]\t\t[{res.workflow.name}]")

## List Existing Workflows (optional)

An optional step to see the existing workflows.

In [None]:
res = gx_client.workflows.list()

for w in res.workflows:
    print(f"[{w.workflow_id}]\t\t[{w.name}]")
    if w.relationships:
        print(f"\tis account default [{w.relationships.account}]\t\tattached to buckets [{w.relationships.ids}]")

## Get Workflow (optional)

An optional step to get workflow by account, bucket ID, or workflow ID.

Set bucket_id and workflow_id if not already set.

In [None]:
from groundx.core import ApiError

try:
    res = gx_client.workflows.get_account()

    print(f"get workflow for account\n\t[{res.workflow.workflow_id}]\t\t[{res.workflow.name}]\n")
except ApiError as e:
    print(f"get workflow for account\n\t[{e.status_code}]\t{e.body}\n")

if not bucket_id:
    raise Exception(f"set bucket_id in the Initialize client step")

try:
    res = gx_client.workflows.get(id=bucket_id)

    print(f"get workflow by bucket_id [{bucket_id}]\n\t[{res.workflow.workflow_id}]\t\t[{res.workflow.name}]\n")
except ApiError as e:
    print(f"get workflow by bucket_id [{bucket_id}]\n\t[{e.status_code}]\t{e.body}\n")

if not workflow_id:
    raise Exception(f"set workflow_id in the Initialize client step")

try:
    res = gx_client.workflows.get(workflow_id)

    print(f"get workflow by workflow_id [{workflow_id}]\n\t[{res.workflow.workflow_id}]\t\t[{res.workflow.name}]")
    if res.workflow.relationships:
        print(f"\t\tis account default [{res.workflow.relationships.account}]\t\tattached to buckets [{res.workflow.relationships.ids}]")
except ApiError as e:
    print(f"get workflow by workflow_id [{workflow_id}]\n\t[{e.status_code}]\t{e.body}")

## Update Workflow

Update an existing workflow. You must set the workflow_id to the GroundX ID.

In [None]:
if not file_name:
    raise Exception(f"set file_name in the Initialize client step")

res = prompt_manager.update_prompts(
    file_name=file_name,
    workflow_id=workflow_id,
)

print(f"[{res.workflow.workflow_id}] [{res.workflow.name}]")

## Assign to Account as the Default Prompt (optional)

An optional step to change the account default prompt.

**note: this will replace the current default account prompt**

In [None]:
if not workflow_id:
    raise Exception(f"set workflow_id in the Initialize client step")

res = gx_client.workflows.add_to_account(workflow_id=workflow_id)

print(res)

## Assign to a Bucket (optional)

An optional step to change the prompt assigned to a bucket.

**note: this will replace the current prompt assigned to the bucket**

In [None]:
if not bucket_id:
    bucket_id = 0
if not workflow_id:
    workflow_id = ""

res = gx_client.workflows.add_to_id(id=bucket_id, workflow_id=workflow_id)

print(res)

## Removing from Bucket (optional)

An optional step to remove the prompt assigned to a bucket.

**note: this will remove the current prompt assigned to the bucket**

In [None]:
if not bucket_id:
    bucket_id = 0

res = gx_client.workflows.remove_from_id(id=bucket_id)

print(res)

## Removing from Account (optional)

An optional step to remove the prompt assigned to the account.

**note: this will remove the current prompt assigned to the account**

In [None]:
res = gx_client.workflows.remove_from_account()

print(res)

## List Documents in a Bucket (optional)

An optional step for looking up documents that have already been uploaded to a bucket.

In [None]:
res = gx_client.documents.lookup(id=workflow_id)

if res.documents:
    for doc in res.documents:
        msg = "OK"
        if doc.status_message:
            msg = doc.status_message
        print(f"[{doc.status}]\t[{msg}]\t\t[{doc.process_id}]\t\t[{doc.document_id}]")
else:
    print(res)

## Extract Information from a File

Upload an invoice for information extraction.

In [None]:
from groundx import Document

res = gx_client.ingest(
    documents=[
        Document(
            bucket_id=bucket_id,
            file_path="./test-docs/t-mobile.pdf",
        ),
    ],
)

process_id = res.ingest.process_id
print(f"process_id = [{process_id}]")

## Check Document Processing Status by `process_id`

Check the processing status of a file by `process_id`.

In [None]:
if not process_id:
    raise Exception("process_id is not set")

res = gx_client.documents.get_processing_status_by_id(
    process_id=process_id,
)

document_id: typing.Optional[str] = None
if res.ingest.progress:
    if res.ingest.progress.complete and res.ingest.progress.complete.documents:
        document_id = res.ingest.progress.complete.documents[0].document_id
    elif res.ingest.progress.processing and res.ingest.progress.processing.documents:
        document_id = res.ingest.progress.processing.documents[0].document_id

print(f"[{res.ingest.status}]\t[{res.ingest.process_id}]\t\t[{document_id}]")

## Check Document Processing Status by `document_id`

Check the processing status of a file by `document_id`.

In [None]:
if not process_id:
    raise Exception("process_id is not set")

res = gx_client.documents.get_processing_status_by_id(
    process_id=process_id,
)

document_id: typing.Optional[str] = None
if res.ingest.progress:
    if res.ingest.progress.complete and res.ingest.progress.complete.documents:
        document_id = res.ingest.progress.complete.documents[0].document_id
    elif res.ingest.progress.processing and res.ingest.progress.processing.documents:
        document_id = res.ingest.progress.processing.documents[0].document_id

print(f"[{res.ingest.status}]\t[{res.ingest.process_id}]\t\t[{document_id}]")

## Download Extractions

The extract data represents the final extractions from the GroundX pipeline.

In [None]:
if not document_id:
    raise Exception("set document_id")

print(document_id)

gx_client.documents.get_extract(document_id=document_id)