# RoboFlow 100 - Visualize images with PyTorch

Let's see how to parse and visualize images inside RF100 with PyTorch. Let's start by importing a bunch of stuff

In [8]:
from pathlib import Path
from ipywidgets import interact
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from typing import Callable, Tuple, Optional, List, Dict
import torch
from PIL import Image
from dataclasses import dataclass

## Downloading RF100

We will assume you are `RF100` saved on your disk, we provided a detailed guide on the official [README](https://github.com/roboflow-ai/roboflow-100-benchmark/blob/main/README.md) with instruction for dowloading it.

**We assume the dataset was downloaded in yolov5/7 format** using the `-f yolov5` flag

## Dataset

We need to know where `RF100` is stored, feel free to change `ROOT`.


In [9]:
ROOT = Path('../rf100/') # <- change me :)
datasets = sorted(list(ROOT.iterdir()), key=lambda x: x.stem)

Next, we need the classic [PyTorch `Dataset`](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html).

In [10]:
class ImageDataset(Dataset):
    def __init__(self, root: Path, split: str = "train", transform: Callable = None):
        super().__init__()
        # we have three splits, "train", "valid" and "test"
        self.src = root / split
        # images and labels are linked by the same name
        self.names = list(map(lambda x: x.stem, (self.src / "labels").glob("*.txt")))
        self.transform = transform

    def get_image(self, image_path: Path) -> Image.Image:
        """
        This function opens the image and returns it
        """
        image = Image.open(image_path).convert("RGB")
        return image

    def get_labels(self, labels_path: Path) -> Optional[torch.Tensor]:
        """
        This function reads the label txt file in yolo format, each line of the file looks like
        
        <label_id> <x_center> <y_center> <width> <height>
        
        The coordinates are with respect to the image's width and height, so between 0 - 1
        
        We parse the labels with the following steps:
            1) read line by line
            2) for each line, get the label id, x_center, y_center, width and height
            3) convert to a tensor
            4) add to the previous one by stacking them vertically
            
        The return tensor has shape `batch_size, 5`
        """
        labels = None
        with labels_path.open("r") as f:
            for line in f.readlines():
                parsed_line = [float(e) for e in line.strip().split(" ")]
                if len(parsed_line) != 5:
                    continue
                c, x, y, w, h = [float(e) for e in line.strip().split(" ")]
                label = torch.as_tensor([[c, x, y, w, h]])
                if labels is None:
                    labels = label
                else:
                    labels = torch.vstack([labels, label])
        return labels

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        """
        Parse image and associated labels are return them as a tensor.
        """
        image_path = self.src / "images" / f"{self.names[idx]}.jpg"
        labels_path = self.src / "labels" / f"{self.names[idx]}.txt"
        image = self.get_image(image_path)
        labels = self.get_labels(labels_path)
        bboxes = None
        if labels is not None:
            bboxes = labels[..., 1:]
            labels = labels[..., 0].to(torch.int)
            image, bboxes = self.transform(image, bboxes)
        else:
            # if we don't have labels, let's use this values
            labels = torch.tensor([-1.0])
            bboxes = torch.as_tensor([[0.0, 0.0, 0.0, 0.0]])
        image, bboxes = self.transform(image, bboxes)
        image = T.functional.to_tensor(image)
        return {"image": image, "bboxes": bboxes, "labels": labels}

    def __len__(self):
        return len(self.names)


### Transformations

Since we will probably need to resize the image, we can create a custom transformation that works on `image` and `bboxes` as well!

In [11]:
@dataclass
class Resize:
    size: Tuple[int, int]
    
    def __call__(self, image: Image.Image, bboxes: Optional[torch.Tensor]) -> Tuple[Image.Image, torch.Tensor]:
        # in PIL they are inverted LoL
        w, h = image.size
        if bboxes is not None:
            bboxes *= torch.as_tensor([w, h, w, h])
        image = T.functional.resize(image, self.size)
        new_w, new_h = image.size
        if bboxes is not None:
            # map to new sizes
            bboxes /= torch.as_tensor([w / new_w , h / new_h, w / new_w, h / new_h])
            # to 0 - 1
            bboxes /= torch.as_tensor([new_w, new_h, new_w, new_h])
        return image, bboxes

## Visualisation
Let's create `AnnotatedImage` to help us draw the bboxes on the image, since `torchvision.utils.draw_bounding_boxes` uses `xyxy` format, we will add an handy method `from_xywh` to convert yolo style annotation (`x_center`, `y_center`, `width`, `height`) to `xyxy`.

In [12]:
from torchvision.utils import draw_bounding_boxes
from torchvision.ops.boxes import box_convert

class AnnotatedImage:
    def __init__(self, image: torch.Tensor, bboxes: torch.Tensor, labels: torch.Tensor):
        self.image = image
        self.bboxes = bboxes
        self.labels = labels
        
    def draw(self):
        c, h, w = self.image.shape
        bboxes = self.bboxes * torch.as_tensor([w, h, w, h])
        return draw_bounding_boxes(
            (self.image * 255).to(torch.uint8), 
            bboxes, 
            width=3,
            colors=["yellow"] * len(self.labels),
            labels=[str(i.item()) for i in self.labels])
    
        

Finally, we can use jupyter `ipywidgets` module to visualize the images and the labels

In [15]:
import matplotlib.pyplot as plt
from torchvision.ops.boxes import box_convert

from ipywidgets import interactive,Dropdown, IntSlider

dataset_path_dropdown = Dropdown(options=map(lambda x: x.stem, datasets))
dataset_path_split = Dropdown(options=["train", "valid", "test"])
image_idx_slider = IntSlider(min=0, max=1)

@interact(dataset_path=dataset_path_dropdown, split=dataset_path_split, image_idx=image_idx_slider)
def visualize(dataset_path: Path, split: str = "train", image_idx: int = 0):
    ds = ImageDataset(ROOT / dataset_path, split=split, transform=Resize((640, 640)))
    image_idx_slider.max = len(ds)
    # let's be sure we are within range
    image_idx = min(image_idx_slider.value, len(ds) - 1)
    data = ds[image_idx]
    bboxes = box_convert(data["bboxes"], in_fmt="cxcywh", out_fmt="xyxy")
    img = AnnotatedImage(data["image"], bboxes, data["labels"]).draw()
    return Image.fromarray(img.permute(1,2,0).numpy())


interactive(children=(Dropdown(description='dataset_path', options=('4-fold-defect', 'abdomen-mri', 'acl-x-ray…

Et voilà 🥳