# Homework 6 - Datasets


## Exercise 1

Objective:
Implement a dataset class for the custom dataset: https://www.kaggle.com/datasets/mbkinaci/fruit-images-for-object-detection

Note, that this dataset contains object detection annotations in the form of xml files. Object detection annotations are a way to describe the location (bounding box) in addition to the class of objects in an image.

The task is to:

1. Download the dataset locally
2. Pass the path to the test folder to the `root_dir` parameter of the `CustomDataset` constructor method.
3. Complete the methods of the `CustomDataset` class, so that it can be used to load the data.
4. Introduce all the sensible transformations to the dataset, including four new transformations that were not used during the lecture. Feel free to use `albumentations` or `torchvision.transforms.v2`. Try to apply point transformations to images, such that the
labels remain valid after transformation has been applied.

Hint: To read the xml files, you can use the `xml.etree.ElementTree` module.

```

In [58]:
from torch.utils.data import Dataset, DataLoader
import torch
import cv2

from PIL import Image
from xml.etree import ElementTree as ET
from torchvision.transforms.v2 import ToTensor
import numpy as np
import matplotlib.pyplot as plt

#student imports
import os
import xml.etree.ElementTree as ET
import torchvision
import albumentations as A


class CustomDataset(Dataset):
    def __init__(self, root_dir: str, transform=None):
        """
        Properly initialize the dataset. This means filling the lists
        self.images and self.labels with paths to the images and labels, respectively.
        Note: the order of the paths in those lists is important, so use the same order
        (e.g. image orange_90.jpg and label orange_90.xml should be in the same index in the lists).
        """
        self.transform = transform
        # <your_code_here>
        self.root_dir = root_dir
        #we know that the images and labels files are in the same order so we can sort them to ensure that image and corresponding label are in the same index
        files_in_root_dir = os.listdir(self.root_dir)
        self.images = sorted([f for f in files_in_root_dir if f.endswith(".jpg")])
        self.labels = sorted([f for f in files_in_root_dir if f.endswith(".xml")])
        # </your_code_here>
    def __len__(self):
        """
        Return the number of samples in the dataset.
        """
        # <your_code_here>
        return len(self.images)
        # </your_code_here>

    def __getitem__(self, idx):
        """
        Make sure that this method returns a tuple of image and objects where:
        - image is a torch tensor of shape (C, H, W) - [channels, height, width]
        - objects is a list of tuples in the form (xmin, ymin, xmax, ymax, label).
            In other words, each image can contain multiple objects, and each object has
            bounding box data (xmin, ymin, xmax, ymax) and label.
        Note: reading xml files is a bit tricky, feel free to collaborate with your peers
        """
        # <your_code_here>
        # get rectangles of objects from xml file
        label_path = os.path.join(self.root_dir, self.labels[idx])
        tree = ET.parse(label_path)
        root = tree.getroot()
        objects: list[tuple[int, int, int, int, str]] = []
        for obj in root.findall('object'):
            label = obj.find('name').text
            box = obj.find('bndbox')
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            objects.append((xmin, ymin, xmax, ymax, label))
        # get image and apply transformations
        image_path = os.path.join(self.root_dir, self.images[idx])
        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)
            objects = self.transform(objects)
        else:
            image = ToTensor()(image)
        print(type(image))
        print(image.shape)
        print(type(objects))
        print(objects)
        # </your_code_here>
        return image, objects


"""
Change the custom_transform variable to the transformation you want to apply to the dataset.
Hint: Use torchvision.transformers.v2.Compose to combine multiple transformations.
"""
# </your_code_here>
custom_transform = torchvision.transforms.v2.Compose([
    
    torchvision.transforms.v2.ToDtype(torch.uint8, scale=True),
    torchvision.transforms.v2.ToTensor(),
])
# </your_code_here>

"""
Change the root_dir variable to the path to the test folder.
"""
# <your_code_here>
root_dir = "data/test"
# </your_code_here>

dataset = CustomDataset(
    root_dir=root_dir,
    transform=ToTensor() if custom_transform is None else custom_transform,
)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

label_2_colour = {
    "orange": (255, 165, 0),
    "banana": (204, 153, 0),
    "apple": (255, 0, 0),
}

for i, (image, objects) in enumerate(dataloader):
    # image is a tensor of shape (1, 3, H, W)
    # we need to convert it to shape (H, W, 3), and then convert from torch to numpy
    img = image[0].permute(1, 2, 0).numpy()
    # change the img to that it can be displayed with opencv
    img = (img * 255).astype(np.uint8)
    for obj in objects:
        # iterate over the objects and draw bounding boxes around them
        xmin, ymin, xmax, ymax, label = obj
        color = label_2_colour[label[0]]
        img_copy = img.copy()  # Create a copy of the image
        cv2.rectangle(
            img_copy, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 5
        )
        img = img_copy  # Assign the modified copy back to img
    plt.imshow(img)
    plt.show()
    if i == 10:
        break


ModuleNotFoundError: No module named 'albumentations'

In [50]:
import torch

# Tworzenie przykładowego tensora
tensor = torch.tensor([[1, 2, 3], [1, 5, 6], [1, 8, 9]])
# Konwersja tensora do listy
list_from_tensor = tensor.tolist()

print("Tensor:\n", tensor)
print("Lista:", list_from_tensor)

custom_transform = torchvision.transforms.v2.Compose([
    torchvision.transforms.v2.Resize((224, 224)),
    torchvision.transforms.v2.ToDtype(torch.uint8, scale=True),
    torchvision.transforms.v2.ToTensor(),
])

tensor = custom_transform(tensor)
list_from_tensor = tensor.tolist()


print("Tensor after transform:\n", tensor)
print("Lista after transform:", list_from_tensor)

Tensor:
 tensor([[1, 2, 3],
        [1, 5, 6],
        [1, 8, 9]])
Lista: [[1, 2, 3], [1, 5, 6], [1, 8, 9]]


ValueError: not enough values to unpack (expected 3, got 2)

In [46]:
# Przykładowa wielowymiarowa lista
multi_dimensional_list = [[1, 2], [3, 4], [5, 6]]

# Lista z danymi do dodania
additional_data = [10, 20, 30]

# Dodawanie dodatkowej danej do każdego wymiaru
multi_dimensional_list = [sublist + [additional_data[i]] for i, sublist in enumerate(multi_dimensional_list)]

print("Zaktualizowana lista:", multi_dimensional_list)

Zaktualizowana lista: [[1, 2, 10], [3, 4, 20], [5, 6, 30]]
