In [1]:
import torch
from torch.utils.data.dataset import Dataset
from PIL import Image
from os import path
from torchvision import transforms
import numpy as np

# First test implementation

## Costruction of dataset

- `all_labels.txt`: All image's labels of all entire dataset
- `classes.txt`: classes
- `test.txt`: test dataset
- `test.txt`: train dataset 

In [8]:
class TrashbeanDataset(Dataset):
    def __init__(self, base_path, txt_list, transform=None):
        IMG_CHANNEL = 3

        self.txt_list = txt_list ## utile per la normalizzazione

        self.base_path = base_path
        self.images = np.loadtxt(txt_list, dtype=str, delimiter=',')
        self.mean = np.zeros(IMG_CHANNEL)
        self.dev_std = np.zeros(IMG_CHANNEL)

        self.transform = transform

    def __getitem__(self, index):
        f,c = self.images[index] # recuperiamo il path dell'immagine di indice index e la relativa etichetta
        im = Image.open(path.join(self.base_path, f))   ## load img with PIL

        if self.transform is not None:          ## toTensor, resize, normalize etc...
            im = self.transform(im)

        label = int(c) # converto l'etichetta in intero

        return {'image': im, 'label': label}    # return dictionary --> immagine : etichetta

    def _bef_normalize(self):
        for sample in self:
            self.mean += sample['image'].sum(1).sum(1).numpy() ## accumulo somma dei pixel canale per canale

        self.mean = self.mean / ( len(self)*256*256 )

        for sample in self:
            self.dev_std += ( ( sample['image'] - torch.Tensor(self.mean).view(3,1,1) )**2).sum(1).sum(1).numpy()

        self.dev_std = np.sqrt(self.dev_std/(len(self)*256*256))

    # restituisce numero di campioni: la lunghezza della lista images
    def __len__(self):
        return len(self.images)

In [9]:

# transforms.Resize the input image to the given size. If the image is torch Tensor, it is expected to have […, H, W] shape, where … means an arbitrary number of leading dimensions

dataset_train = TrashbeanDataset('static/datasets/img', 'static/datasets/train.txt', transform=transforms.ToTensor()) # can put (w,h) too
# dataset_test

# print(sample['image'].shape, sample['label'])
print("shape first img loaded:", dataset_train[0]['image'].shape)
# print(dataset__.__getitem__(0))
# print(dataset_train[0]['image'].shape)

print("before calculated", dataset_train.mean, dataset_train.dev_std)

## calcolo media e dev std con il dataset "puro" senza trasformazioni su immagini
dataset_train._bef_normalize()

print("after calculated", dataset_train.mean, dataset_train.dev_std)

m = dataset_train.mean
s = dataset_train.dev_std

img_dim = 100   ## can be (h,w), see documentation

transform_normalization = transforms.Compose([  transforms.Resize(img_dim),     ## resize image
                                                transforms.ToTensor(),
                                                ## without the apply of these 2 function u can see the difference between normalizated and not
                                                transforms.Normalize(m,s), ## normalize with mean and dev std
                                                torch.flatten   ## normalize with lambda function
                                            ])

dataset_train = TrashbeanDataset('static/datasets/img', 'static/datasets/train.txt', transform=transform_normalization) # can put (w,h) too

print("shape first img after normalization:", dataset_train[0]['image'].shape, dataset_train[0]['label'])



shape first img loaded: torch.Size([3, 1920, 1080])
before calculated [0. 0. 0.] [0. 0. 0.]
after calculated [16.55578594 15.55018628 14.82821083] [90.19166519 84.71409267 80.78255968]
shape first img after normalization: torch.Size([53100]) -1
