In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={
    "wider_dataset_url": "/raw/face_datasets/wider_face/wider-face.tar",
    "yolox_pth_pretrain_weights": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth"
}

In [None]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

In [None]:
# specify all notebook wide libraries imports here
# Sinara lib imports is left in the place of their usage
import os
import os.path as osp

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(
    tmp_entities = 
    [
        { ENTITY_NAME: "downloaded_archives"}, # temporarily dowloaded zip archives
        { ENTITY_NAME: "wider_dataset"}, # extracted temporary images from downloaded_archives zip
        { ENTITY_NAME: "yolox_pth_pretrain_weights"} # temporarily dowloaded pretrain weights
    ],

    outputs =
    [
        { ENTITY_NAME: "wider_dataset"}, # images and annotations stored for using in next steps
        { ENTITY_NAME: "yolox_pth_pretrain_weights"} # pretrain weights stored for using in next steps
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

In [None]:
# run spark
from sinara.spark import SinaraSpark
from sinara.archive import SinaraArchive

spark = SinaraSpark.run_session(0)
archive = SinaraArchive(spark)
SinaraSpark.ui_url()

### Loading coco_datasets_images and coco_datasets_annotations zip archive

In [None]:
tmp_entities = substep.tmp_entities()

wider_dataset_url = substep_params["wider_dataset_url"]

In [None]:
# Copy dataset archive
!cp {wider_dataset_url} {osp.join(tmp_entities.downloaded_archives, osp.basename(wider_dataset_url))}

In [None]:
# Unpack
!tar -xf {osp.join(tmp_entities.downloaded_archives, osp.basename(wider_dataset_url))} -C {tmp_entities.wider_dataset}

#### Load pretrain weights to directory of yolox_pth_pretrain_weights

In [None]:
yolox_pth_pretrain_weights_url = substep_params["yolox_pth_pretrain_weights"]
yolox_pth_pretrain_weights_path = osp.join(tmp_entities.yolox_pth_pretrain_weights, osp.basename(yolox_pth_pretrain_weights_url))
 
!wget {yolox_pth_pretrain_weights_url} -O {yolox_pth_pretrain_weights_path}

### Archiving wider_dataset for next step

In [None]:
# Save tmp_entities.wider_dataset to outputs of step data_load
tmp_entities = substep.tmp_entities()

outputs = substep.outputs()

archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_entities.wider_dataset, store_path=outputs.wider_dataset)
archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_entities.yolox_pth_pretrain_weights, store_path=outputs.yolox_pth_pretrain_weights)

In [None]:
# Stop spark
SinaraSpark.stop_session()