In [None]:
%%writefile requirements.txt

-f https://download.pytorch.org/whl/torch_stable.html
torch==1.8.0+cu111
torchvision==0.9.0+cu111


-i https://<PYPI_USERNAME>:<PYPI_PASSWORD>@pypi.silverpond.com.au/simple
highlighter-client-v2-alpha==0.2

--extra-index-url https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
mmcv-full==1.3.17+torch1.8.0+cu111

onnx
onnxruntime==1.8.1

In [None]:
!!apt-get install libmagic-dev
!git clone --depth 1 --branch v2.18.0 https://github.com/open-mmlab/mmdetection.git
!pip install -r requirements.txt
!(cd mmdetection; pip install .)


# House Keeping

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

In [None]:
from highlighter_client.gql_client import HLClient

# Needed when using HighlighterClient in a notebook environment
HLClient._async = True

# Small helper function for displaying the DataFrames in the highlighter clinet
# dataset object
def display_ds(ds, count=10):
    display(ds.annotations_df.head(count))
    display(ds.images_df.head(count))

# Download data using Highlighter Client.

For a more detailed run through of how to use HighlighterClient see the [export-submissions](https://github.com/tall-josh/highlighter-client-v2-notebooks/blob/main/export-submissions.ipynb) notebook.


In [None]:
HL_WEB_GRAPHQL_API_TOKEN="..."
HL_WEB_GRAPHQL_ENDPOINT="https://<account-name>.highlighter.ai/graphql"

dataset_id = 191

In [None]:
from highlighter_client.datasets import get_reader, get_writer
from highlighter_client.datasets.dataset import Dataset
from highlighter_client.base_models import DatasetSubmissionTypeConnection
from highlighter_client.paginate import paginate

ds = Dataset(
    reader=get_reader("highlighter_submissions")(),
    writer=get_writer("coco")(),
)

client = HLClient.from_credential(api_token=HL_WEB_GRAPHQL_API_TOKEN, endpoint_url=HL_WEB_GRAPHQL_ENDPOINT)

submissions_gen = paginate(
client.datasetSubmissionConnection,
DatasetSubmissionTypeConnection,
datasetId=dataset_id,
)

print("This could take a minute")
ds.read(submissions_gen=submissions_gen)


In [None]:
display_ds(ds)    

# Preprocessing

At this point you may wish to do some pre-processing eg:

  - **remove unwanted classes**: You may wish to filter some annotations from your dataset
  - **split the data**: notice the `split` column is only a single value *data*. We can apply a random split before saving to `coco` format.

To keep things general we will simply split the data into **train** and **test** in this notebook




In [None]:
train_frac = 0.8
ds.images_df["split"] = "train"

test_ids = ds.images_df.sample(frac=1-train_frac, random_state=42).image_id
ds.images_df.loc[ds.images_df.image_id.isin(test_ids), "split"] = "test"
ds.images_df

In [None]:
from pathlib import Path

image_dir = Path("data/images")
annotations_dir = Path("data/annotatoins")

image_dir.mkdir(parents=True, exist_ok=True)
annotations_dir.mkdir(parents=True, exist_ok=True)

ds.write(annotations_dir=annotations_dir)

# Check the json files exported correctly

We'll also get the number of categories in the training data. We will need it
when we configure the mmdet model for training.


In [None]:
import json

with (annotations_dir/"train.json").open('r') as f:
    train_data = json.load(f)
    
# We'll use this later when configuring the mmdet frcnn model
categories = train_data["categories"]
sorted(categories, key = lambda i: i["id"])

num_classes = len(categories)

for c in categories:
    print(c)
    
CLASSES = [i["name"] for i in categories]

print(f"num_images: {len(train_data['images'])}")
print(f"num_annos: {len(train_data['annotations'])}")

In [None]:
from highlighter_client.io import multithread_graphql_image_download

HLClient._async = False
result = multithread_graphql_image_download(
    client,
    list(ds.images_df.image_id.values),
    image_dir,
)

In [None]:
from mmcv import Config

mmdet_config = dict(
    work_dir = "zzz_work_dir",
    gpu_ids = [0],
    seed = 42,
    data = dict(
        train = dict(
            ann_file=str(annotations_dir / "train.json"),
            img_prefix=str(image_dir),
        ),
        val = dict(
            ann_file=str(annotations_dir / "test.json"),
            img_prefix=str(image_dir),
        ),
        test = dict(
            ann_file=str(annotations_dir / "test.json"),
            img_prefix=str(image_dir),
        ),
    ),
    model = dict(
        roi_head = dict(
            bbox_head = dict(
                num_classes = num_classes,
            ),
        ),
    )
)
cfg = Config.fromfile("mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py")
cfg.merge_from_dict(mmdet_config)
cfg

In [None]:
cfg.data.train

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
import mmcv
import os.path as osp



# Build dataset
datasets = [build_dataset(cfg.data.train)]
datasets[0].CLASSES = CLASSES

# Build the detector
model = build_detector(
    cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
# Add an attribute for visualization convenience
model.CLASSES = CLASSES

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
dir(Config)