In [None]:
import expb

import numpy as np
import cupy as cp

In [None]:
DATA_PATH = r"path_to_data"
RANDOM_SEED = 477

In [None]:
expb.download_dataset(url="your_url", dest_path=DATA_PATH, is_zip=True)

In [None]:
ds = expb.build_dataset(DATA_PATH, format="coco", task="segm")

In [None]:
# set the category hierarchy: control which category, when segmentations overlap, will be used in the label

# background is set to 0 by default
cat_hierarchy = {"grass": 1, "road": 2, "stop-sign": 3, "debris": 4, "misc": 4}

ds.metadata.set_category_hierarchy(cat_hierarchy)

In [None]:
import expb.By as By

# get subsets of the data using a ByOption (e.g. TAG, CATEGORY, IMGSHAPE)
good_ds = ds.subset(by=By.TAG, value="Good")

In [None]:
# split data into training and testing

train_ds, test_ds = good_ds.split([0.8, 0.2], shuffle=True, random_seed=RANDOM_SEED)

In [None]:
# Applying functions to a Dataset is easy. Heres an example of an rgb to grayscale function:


def rgb2gray(data, weights=[0.299, 0.587, 0.114]):
    return np.dot(data[..., :3], weights)


train_ds.apply(rgb2gray)

# Pass parameters as a tuple or dict:

train_ds.apply(rgb2gray, params=([0.298, 0.591, 0.111],))
# OR
train_ds.apply(rgb2gray, kw_params={"weights": [0.298, 0.591, 0.111]})

# Importantly, data will not be loaded into memory and the function will not execute until .execute() is called:

tr_gray_ds = train_ds.execute(return_dataset=True) # if you'd like the metadata attached to your output, pass True to the execute function.

# Chaining actions and an execute call is permitted:

result = tr_gray_ds.apply(func1).apply(func2).execute(return_dataset=False)

In [None]:
# Performing operations on a gpu is easy with expb

# Use cupy's get_array_module to define functions impartial to the arrays location (e.g. host or gpu):

def rgb2gray(data, weights=[0.299, 0.587, 0.114]):
    xp = cp.get_array_module(data)
    if xp is cp:
        weights = cp.asarray(weights)
    return xp.dot(data[..., :3], weights)

# Then use .to() in your action chain

tr_gray_ds = train_ds.to('cuda').apply(rgb2gray).execute(return_dataset=True)