# Dataset Analysis

Install Bisturi.

In [None]:
!pip -q install ..

In [None]:
basepath = None

Load the **Broden** dataset.

In [None]:
from bisturi.dataset.broden import BrodenDataset
from bisturi.dataset.broden import BrodenOntology
import os

dset_path = os.path.join(basepath, 'datasets/broden1_224/')
ontology = BrodenOntology(dset_path)
dset = BrodenDataset(dset_path, mean=[1,1,1], std=[1,1,1], ontology=ontology)
concepts = ontology.to_list()

Load the **ImageNet** dataset.

In [None]:
from bisturi.dataset.imagenet import ImageNetDataset
from bisturi.dataset.imagenet import ImageNetOntology

dset_path = basepath + 'datasets/ilsvrc2011/'
ontology = ImageNetOntology(dset_path)
dset = ImageNetDataset(dset_path, mean=[1,1,1], std=[1,1,1], ontology=ontology)
concepts = ontology.to_list()

## Dataset stats

In [None]:
print(
    f'Unique concepts: {len(concepts)}',
    f'Propagated concepts: {len([c for c in concepts if c.propagated])}',
    f'Dataset Size: {len(dset)}',
    sep='\n')

## Select image

In [None]:
from torchvision.transforms import ToPILImage
from torchvision.transforms import Resize
import numpy as np
import IPython

In [None]:
img_idx = 0

In [None]:
_, img, masks = dset[img_idx]

In [None]:
to_pil = ToPILImage()
IPython.display.display(to_pil(img))

## Select Concept

In [None]:
img_concepts = masks.select_concepts(concepts)
for i, c in enumerate(img_concepts):
    print(i, c)

In [None]:
c_idx = 1

In [None]:
c_mask = masks.get_concept_mask(img_concepts[c_idx])

In [None]:
if img.shape[1:] == c_mask.shape:
    masked = img * c_mask
    print('Same size')
else:
    to_cmask_size = Resize(c_mask.shape)
    to_img_size = Resize(img.shape[1:])
    masked = to_cmask_size(img)
    masked = masked * c_mask
    masked = to_img_size(masked)
    print('Resized')

In [None]:
IPython.display.display(to_pil(masked))