In [None]:
import json
from pathlib import Path
from uuid import uuid4
import urllib
import tarfile
import tempfile
import os


import highlighter_client_v2 as hl
from highlighter_client_v2.datasets import ImageRecord, AttributeRecord, Dataset

from IPython.display import display_html
from itertools import chain,cycle

def show_dataset(ds):
    """Helper to display Datasets nicely in the Notebook
    """
    html_str=''
    for df,title in zip([ds.annotations_df, ds.images_df], chain(["Annotations", "Images"],cycle(['</br>'])) ):
        html_str+='<th style="text-align:center"><td style="vertical-align:top">'
        html_str+=f'<h2 style="text-align: center;">{title}</h2>'
        html_str+=df.head(5).to_html().replace('table','table style="display:inline"')
        html_str+=f'<br> shape: {df.shape}</td></th>'
    display_html(html_str,raw=True)
    

In [None]:

SAMPLE_DATASET_URL = "https://highlighter-public.s3.ap-southeast-2.amazonaws.com/simple-shapes-coco/simple_shapes_dataset.tar"

# Create a temporary directory
temp_dir = tempfile.mkdtemp()

try:
    # Download the tar file
    filename = SAMPLE_DATASET_URL.split('/')[-1]
    filepath = Path(temp_dir) / filename
    urllib.request.urlretrieve(SAMPLE_DATASET_URL, filepath)

    # Extract the tar file
    with tarfile.open(filepath, 'r') as tar:
        tar.extractall(temp_dir)
        
    dataset_path = Path(temp_dir) / filepath.stem
    print("File downloaded and extracted to:", dataset_path)

except Exception as e:
    print("Error:", e)

    
COCO_JSON = dataset_path / "data.json"
IMAGES_DIR = dataset_path / "images"

# Create a Dataset From A Supported Format

Some common dataset formats can be read from out-of-the-box, and we plan to add more as time goes on.



In [None]:
ds = Dataset.read_coco(COCO_JSON)

In [None]:
show_dataset(ds)

# Initalize A Highlighter Client

In [None]:
api_token = os.environ["HL_WEB_GRAPHQL_API_TOKEN"]
endpoint_url = os.environ["HL_WEB_GRAPHQL_ENDPOINT"]

client = hl.HLClient.from_credential(api_token=api_token, endpoint_url=endpoint_url)
print(client)

# Upload The Images To A Data Source

**First create a Data Source in the Highlighter Web UI, note the id and come back**

You can find the ID in the URL

```
https://compuglobalhypermeganet.highlighter.ai/data_sources/#####
                                                            ^^^^^
                                                              |
                                Data Source ID -----------------

```


In [None]:
data_source_id = 2601 # ToDo

_ = ds.upload_images(client, data_source_id, image_dir=IMAGES_DIR)

In [None]:
show_dataset(ds)

# Create Object Classes

Here we map the class names in the source dataset to Highlighter ObjectClass uuids. We will create
them in Highlighter if one of the same name does not already exist

In [None]:

# Get the unique object class names
adf = ds.annotations_df
object_class_names = adf[adf.attribute_id == str(hl.OBJECT_CLASS_ATTRIBUTE_UUID)].value.unique()

# This function checks if object classes exist by of the same name
# and is case incentive before creating them. Then returns a dict mapping
# the original name to the Highlighter ObjectClass.uuid
object_class_name_to_highlighter_uuid = hl.object_classes.create_object_classes(client, object_class_names)
print(object_class_name_to_highlighter_uuid)

# Create An Assessment Process

The Assessment process is where we store the annotations for a set of images

In [None]:
# If you already have a process_id set it here, if not leave as None
process_id = 987

if process_id is None:

    # Create an Assessment Process
    # Note: Assessment processes names must be unique
    process_name = "My Toy Process 000"

    assessment_process = hl.create_assessment_process(client, name=process_name,
                             object_class_uuids=[str(i) for i in object_class_name_to_highlighter_uuid.values()])
    process_id = assessment_process.id
    print(assessment_process)
    

In [None]:
from highlighter_client_v2.datasets.formats.highlighter.writer import HighlighterSubmissionsWriter

# Define the Dataset Writer
writer = HighlighterSubmissionsWriter(client=client,
                                      assessment_process_id=process_id,
                                        )

writer.write(dataset=ds)

In [None]:
writer.object_class_lookup

**Your data should now be visible in the Assesment Process you defined**

Below are some extra credit tutorials

---

---

# Create Dataset From A Custom Format

Many times you will be uploading data from a non standard format. The dataset we're working with is in 
the popular Coco format which **is** supported by Highlighter. However, for the purpose of the exercisewe'll
do this manually.

The below code block loops through each image and creates a list of `ImagRecord`s then loops through each annotation and creates a list of `AttributeRecord`s. The `ImageRecord`s are pretty straight forward, but let us focus on the `AttributeRecord`s

In its simplest form each `AttributeRecord` requres:
  - `image_id`: This indicates the image the attribute belongs to
  - `value`: This is the value of the attribute, and
  - `entity_id`: This uniquely identifies an individual object or "thing" in an image or even across time or data sources. For example, in the block below we delibrately use the same `entity_id` for both the `PixelLocationAttributeValue` and `ObjectClassAttributeValue`. This tells Highlighter both attributes refer to the same "thing"

In [None]:
from highlighter_client_v2 import read_object_classes, LabeledUUID
from highlighter_client_v2.datasets.base_models import (
    ObjectClassAttributeValue,
    PixelLocationAttributeValue,
    AttributeRecord,
    ImageRecord
)

with open(COCO_JSON, 'r') as f:
    data = json.load(f)
    
# Get a lookup to map class names to object class uuids
object_class_uuid_lookup = {o.name: o.uuid for o in read_object_classes(client, process_id=process_id)}
cat_id_to_name = {c["id"]: c["name"] for c in data["categories"]}

# We use the ImageRecord BaseModel to validate the fields
# before adding them to the Dataset.
image_records = [ImageRecord(image_id=i["id"],
                             width=i["width"],
                             height=i["height"],
                             filename=i["file_name"],
                            ) for i in data["images"]]

attribute_records = []
for a in data["annotations"]:
    entity_id = str(uuid4())
    
    # Create an AttributeRecord with an ObjectClassAttributeValue by:
    #   - looking up the object_class_uuid from a dict
    #   - creating an LabeledUUID for the object class value. You can use LabeledUUID
    #     or UUID interchangably. LabeledUUID is simply used to make things readable
    #   - Append the AttributeRecord to attribute_records
    object_class_name = cat_id_to_name[a["category_id"]]
    object_class_uuid = object_class_uuid_lookup[object_class_name]
    object_class_value = LabeledUUID(object_class_uuid, label=object_class_name)
    object_class_attribute_value = ObjectClassAttributeValue(value=object_class_value)
    
    attribute_records.append(
        AttributeRecord.from_attribute_value(
            a["image_id"],
            object_class_attribute_value,
            entity_id=entity_id,
        )
    )
    
    # Create an AttributeRecord with an PixelLocationAttributeValue by:
    #   - using the PixelLocationAttributeValue helper function to from_left_top_width_height_coords
    #   - Append the AttributeRecord to attribute_records
    pixel_location_attribute_value = PixelLocationAttributeValue.from_left_top_width_height_coords(a["bbox"])
    
    # Create an PixelLocation AttributeValue
    attribute_records.append(
        AttributeRecord.from_attribute_value(
            a["image_id"],
            pixel_location_attribute_value,
            entity_id=entity_id,
        )
    )

# Populate the Dataset
ds = Dataset(attribute_records=attribute_records, image_records=image_records)

show_dataset(ds)

Once the `Dataset` object has been created you can use the same steps details at the beginning of the Notebook to upload the images and  attributes.

---
---

# Create Submissions By Performing Inference On Images In Highlighter

Finally. If you have images alread stored in Highligher and you want to do predictions on those images and upload the results to Highligher you can follow a simplar process, but without the needing to create `ImageRecords` becuause the images are already in Highlighter.



In [None]:
class MyCrappyShapePredictor():
    
    def __init__(self, object_class_uuids):
        self.object_class_uuids = object_class_uuids
        
    def predict(self, image):
        return 