In [1]:
from fastai.vision.all import *
import params

import wandb

In [2]:
URL = "https://storage.googleapis.com/wandb_course/bdd_simple_1k.zip"

In [3]:
path = Path(untar_data(URL, force_download=True))

In [4]:
path.ls()

(#3) [Path('/home/reynald/.fastai/data/bdd_simple_1k/images'),Path('/home/reynald/.fastai/data/bdd_simple_1k/LICENSE.txt'),Path('/home/reynald/.fastai/data/bdd_simple_1k/labels')]

In [5]:
def label_func(fname):
    return (fname.parent.parent / "labels") / f"{fname.stem}_mask.png"


def get_classes_per_image(mask_data, class_labels):
    unique = list(np.unique(mask_data))
    result_dict = {}
    for _class in class_labels.keys():
        result_dict[class_labels[_class]] = int(_class in unique)
    return result_dict


def _create_table(image_files, class_labels):
    "Create a table with the dataset"
    labels = [str(class_labels[_lab]) for _lab in list(class_labels)]
    table = wandb.Table(columns=["File_Name", "Images", "Split"] + labels)

    for i, image_file in progress_bar(enumerate(image_files), total=len(image_files)):
        image = Image.open(image_file)
        mask_data = np.array(Image.open(label_func(image_file)))
        class_in_image = get_classes_per_image(mask_data, class_labels)
        table.add_data(
            str(image_file.name),
            wandb.Image(
                image,
                masks={
                    "predictions": {
                        "mask_data": mask_data,
                        "class_labels": class_labels,
                    }
                },
            ),
            "None",  # we don't have a dataset split yet
            *[class_in_image[_lab] for _lab in labels],
        )

    return table

In [6]:
run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type="upload")
raw_data_at = wandb.Artifact(params.RAW_DATA_AT, type="raw_data")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mreynald-havard[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
raw_data_at.add_file(path / "LICENSE.txt", name="LICENSE.txt")

ArtifactManifestEntry(path='LICENSE.txt', digest='X+6ZFkDOlnKesJCNt20yRg==', size=1594, local_path='/home/reynald/.local/share/wandb/artifacts/staging/tmpmjqn9q9c', skip_cache=False)

In [8]:
raw_data_at.add_dir(path / "images", name="images")
raw_data_at.add_dir(path / "labels", name="labels")

[34m[1mwandb[0m: Adding directory to artifact (/home/reynald/.fastai/data/bdd_simple_1k/images)... Done. 3.4s
[34m[1mwandb[0m: Adding directory to artifact (/home/reynald/.fastai/data/bdd_simple_1k/labels)... Done. 3.3s


In [10]:
image_files = get_image_files(path / "images", recurse=False)

In [11]:
table = _create_table(image_files, params.BDD_CLASSES)

In [12]:
raw_data_at.add(table, "eda_table")

ArtifactManifestEntry(path='eda_table.table.json', digest='FOGPII2U9IP1fgBUqs+ubw==', size=588824, local_path='/home/reynald/.local/share/wandb/artifacts/staging/tmpbnntktfm', skip_cache=False)

In [13]:
run.log_artifact(raw_data_at)
run.finish()