In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={
    "facemask_datasets_url": "/raw/facemask_datasets/facemask_detection/face-mask-detection.tar"
}

In [None]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

In [None]:
# specify all notebook wide libraries imports here
# Sinara lib imports is left in the place of their usage
import os
import os.path as osp
import json

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(
    tmp_entities = 
    [
        { ENTITY_NAME: "downloaded_archives"}, # temporarily dowloaded zip archives
        { ENTITY_NAME: "facemask_datasets"}, # extracted temporary images from raw_face_mask_datasets
    ],

    outputs =
    [
        { ENTITY_NAME: "facemask_datasets"}, # images and annotations stored for using in next steps
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

In [None]:
# run spark
from sinara.spark import SinaraSpark
from sinara.archive import SinaraArchive

spark = SinaraSpark.run_session(0)
archive = SinaraArchive(spark)
SinaraSpark.ui_url()

### Loading and unpacking cifar_datasets tar archive 

In [None]:
tmp_entities = substep.tmp_entities()
facemask_datasets_url = substep_params["facemask_datasets_url"]

In [None]:
# Download cifar_datasets
!cp {facemask_datasets_url} {osp.join(tmp_entities.downloaded_archives, osp.basename(facemask_datasets_url))}

In [None]:
# unpack
!tar -xf {osp.join(tmp_entities.downloaded_archives, osp.basename(facemask_datasets_url))} --directory {tmp_entities.facemask_datasets}

### Archiving cifar10_datasets and meta_cifar10_datasets for next step

In [None]:
# Save tmp_entities.cifar10_datasets and tmp_entities.meta_cifar10_datasets to outputs of step data_load
outputs = substep.outputs()

archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_entities.facemask_datasets, store_path=outputs.facemask_datasets)

In [None]:
# Stop spark
SinaraSpark.stop_session()