In [2]:
import datasets
import daft

daft.set_execution_config(enable_native_executor=True, default_morsel_size=256)
ds = datasets.load_dataset("detection-datasets/fashionpedia")
df_train = daft.from_arrow(ds["train"].data.table[:1000])

fashionpedia_num_classes = ds["train"].features["objects"].feature["category"].num_classes
hf_img_to_daft_img = daft.col("image").struct.get("bytes").image.decode()

df_train = df_train.with_column(
    "image", hf_img_to_daft_img
)
df_train.show(2)

image_id Int64,image Image[MIXED],width Int64,height Int64,"objects Struct[bbox_id: List[Int64], category: List[Int64], bbox: List[FixedSizeList[Float64; 4]], area: List[Int64]]"
23,,682,1024,"{bbox_id: [150311, 150312, 150313, 150314], category: [23, 23, 33, 10], bbox: [[445, 910, 505, 983], [239, 940, 284, 994], [298, 282, 386, 352], [210, 282, 448, 665]], area: [1422, 843, 373, 56375], }"
25,,683,1024,"{bbox_id: [158953, 158954, 158955, 158956, 158957, 158958, 158959, 158960, 158961, 158962], category: [2, 33, 31, 31, 13, 7, 22, 22, 23, 23], bbox: [[182, 220, 472, 647], [294, 221, 407, 257], [405, 297, 472, 647], [182, 264, 266, 621], [284, 135, 372, 169], [238, 537, 414, 606], [351, 732, 417, 922], [202, 749, 270, 930], [200, 921, 256, 979], [373, 903, 455, 966]], area: [87267, 1220, 16895, 18541, 1468, 9360, 8629, 8270, 2717, 3121], }"


In [4]:
import torch


def apply_torch_transform(bboxes):
    bboxes = torch.tensor(bboxes, dtype=torch.float32)
    return bboxes


df_train = df_train.with_columns(
    {
        "bboxes": daft.col("objects")
        .struct.get("bbox")
        .apply(
            apply_torch_transform,
            return_dtype=daft.DataType.tensor(daft.DataType.float32()),
        ),
        "img_tensor": daft.col("image").cast(daft.DataType.tensor(dtype=daft.DataType.uint8()))
    }
)
df_train.show(2)

image_id Int64,image Image[MIXED],width Int64,height Int64,"objects Struct[bbox_id: List[Int64], category: List[Int64], bbox: List[FixedSizeList[Float64; 4]], area: List[Int64]]",bboxes Tensor(Float32),img_tensor Tensor(UInt8)
23,,682,1024,"{bbox_id: [150311, 150312, 150313, 150314], category: [23, 23, 33, 10], bbox: [[445, 910, 505, 983], [239, 940, 284, 994], [298, 282, 386, 352], [210, 282, 448, 665]], area: [1422, 843, 373, 56375], }","<Tensor shape=(4, 4)>","<Tensor shape=(1024, 682, 3)>"
25,,683,1024,"{bbox_id: [158953, 158954, 158955, 158956, 158957, 158958, 158959, 158960, 158961, 158962], category: [2, 33, 31, 31, 13, 7, 22, 22, 23, 23], bbox: [[182, 220, 472, 647], [294, 221, 407, 257], [405, 297, 472, 647], [182, 264, 266, 621], [284, 135, 372, 169], [238, 537, 414, 606], [351, 732, 417, 922], [202, 749, 270, 930], [200, 921, 256, 979], [373, 903, 455, 966]], area: [87267, 1220, 16895, 18541, 1468, 9360, 8629, 8270, 2717, 3121], }","<Tensor shape=(10, 4)>","<Tensor shape=(1024, 683, 3)>"


In [None]:
sample = df_train[["bboxes", "img_tensor"]].limit(1).to_pydict()

In [None]:
import albumentations as A

transforms = A.Compose([
    A.RandomResizedCrop(size=(224, 224), antialias=True),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
],
bbox_params=A.BboxParams(format="pascal_voc", label_fields=["category_id"]))
transforms(**sample)

  A.RandomResizedCrop(size=(224, 224), antialias=True),


ValueError: Your 'label_fields' are not valid - them must have same names as params in dict

In [None]:

from daft import DataType as dt

@daft.udf(return_dtype=dt.tensor(dt.float32))
def transform(image, bbox):
    out = transforms(image=image, bbox=bbox)
    print(out)
    return out["image"]
df_train[0]
# df_train.with_columns(transform(daft.col("img_tensor"), daft.col("bboxes")))