In [None]:
HL_WEB_GRAPHQL_API_TOKEN="..."
HL_WEB_GRAPHQL_ENDPOINT="https://<account-name>.highlighter.ai/graphql"

In [None]:
%%writefile requirements.txt
-i https://<PYPI_USERNAME>:<PYPI_PASSWORD>@pypi.silverpond.com.au/simple
highlighter-client-v2-alpha==0.2


In [None]:
!cat requirements.txt

In [None]:
!!apt-get install libmagic-dev
!pip install -r requirements.txt


# House Keeping

In [None]:
from highlighter_client.gql_client import HLClient

# Needed when using HighlighterClient in a notebook environment
HLClient._async = True

# Small helper function for displaying the DataFrames in the highlighter clinet
# dataset object
def display_ds(ds, count=10):
  display(ds.annotations_df.head(count))
  display(ds.images_df.head(count))



# Create a HLClient object from credentials

This client will be used when we need to communicate with Highlighter via GraphQL.

In [None]:
client = HLClient.from_credential(api_token=HL_WEB_GRAPHQL_API_TOKEN, endpoint_url=HL_WEB_GRAPHQL_ENDPOINT)

# Read Dataset from Highlighter

`HighlighterClient` represents datasets as two Pandas DataFrames `annotations_df` and `images_df`. We can populate a `HighlighterClient.Dataset` in several ways using `Readers`. You can list the availaible `Readers` and load one from its name. In this case we'll be loading the `HighlighterSubmissionsReader` so we can pull submissions down from Highlighter.

In [None]:
from highlighter_client.datasets import get_reader, READERS

print(f"READERS: {list(READERS.keys())}")

reader = get_reader("highlighter_submissions")()

In [None]:
# View the doc string and function signature
# Note it expects a submissions generator
# We will create one in a moment.
?reader

Once we have a `Reader` we can initialize a `highlighter_client.Dataset` object 
and with that `Reader`

In [None]:
from highlighter_client.datasets.dataset import Dataset
ds = Dataset(reader=reader)

Now we have a `highlighter_client.Dataset` with a `HighlighterSubmissionsReader` we can populate our `DataFrames`.

To understand this we need to know two things.

1. `highlighter_client` uses Pandas `BaseModel` to tell GraphQL what values to return from a query. Some common `BaseModel`s are defined in `highlighter_client.base_models` but if you want more fine grained control you can define your own.

2. Some GraphQL queries may return many results. These types of queries are called `Connections` are are named accordingly in the code. There is a `paginate` function that takes a `Connection` query and returns a Python Generator.

For more information on the BaseModels see `highlighter_client/base_models.py`

In [None]:
from highlighter_client.base_models import DatasetSubmissionTypeConnection
from highlighter_client.paginate import paginate

dataset_id = ?

submissions_gen = paginate(
client.datasetSubmissionConnection,
DatasetSubmissionTypeConnection,
datasetId=dataset_id,
)

ds.read(submissions_gen=submissions_gen)
display_ds(ds)