In [17]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch
import os
from skimage import io, transform
import numpy as np
from torchvision import transforms, utils
import matplotlib.pyplot as plt
import time

In [47]:
class ProjectDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir, transform=None, debug=False):
        self.label_info_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.debug = debug

    def __len__(self):
        return len(self.label_info_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        label_img_name = self.label_info_frame.iloc[idx, 0]
        img_name = os.path.join(self.root_dir,
                                label_img_name)
        image = io.imread(img_name)
        label = self.label_info_frame.iloc[idx, 1]
        label = np.array([label])
        landmarks = label.astype('float').reshape(-1, 1)
        sample = {'image': image
                  ,'label': label
                  ,'name': label_img_name }

        if self.transform:
            sample = self.transform(sample)
            
        if self.debug:
            print(label_img_name)

        return sample

In [48]:
face_dataset = ProjectDataset(
    csv_file='D:/pravesh/Concordia/2021-Winter/COMP-6721-Intro_To_AI/project/comp-6721-project/label_info.csv'
    ,root_dir='D:/pravesh/Concordia/2021-Winter/COMP-6721-Intro_To_AI/project/1/dataset/images/')
print(len(face_dataset))
face_dataset.debug = True
sample = face_dataset[0]
print(sample['name'], sample['image'].shape)
face_dataset.debug = False

3091
mask\0003.jpg
mask\0003.jpg (2002, 3000, 3)


In [49]:
class Rescale(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
#             if h > w:
#                 new_h, new_w = self.output_size * h / w, self.output_size
#             else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(image, (new_h, new_w))

        return {'image': img, 'label': label}


class RandomCrop(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)

        image = image[top: top + new_h,
                      left: left + new_w]

        return {'image': image, 'label': label}


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image),
                'label': torch.from_numpy(label)}

In [50]:
scale = Rescale(512)
crop = RandomCrop(128)
composed = transforms.Compose([Rescale(256),
                               RandomCrop(224)])

In [51]:
import gc
gc.collect()

counts = np.zeros((100, ))
label_counts = {0: np.zeros((100, ))
                , 1: np.zeros((100, ))
                , 2:np.zeros((100, ))}

face_dataset.debug=False
t1 = time.time()
max_w, max_h = 0, 0
min_w, min_h = 1000000, 100000
count = 0
for k in range(len(face_dataset)):
    sample = face_dataset[k]
    sample_image = sample['image']
    label = sample['label'][0]
#     print(sample['name'], sample_image.shape)
    
    h, w, _ = sample_image.shape
    if w>max_w:
        max_w = w
    if h>max_h:
        max_h = h
    if w<min_w:
        min_w = w
    if h<min_h:
        min_h = h
    
    index = w//100
    
#     if not index in counts:
#         counts[index]=0
    counts[index]+=1
    
#     if not index in label_counts[label]:
#         label_counts[label][index]=0
    label_counts[label][index]+=1
    
    transformed_sample = scale(sample)
    image = transformed_sample['image']
#     print(image.shape)
#   print(image.shape)
#     plt.figure()
#     plt.imshow(image)
#     plt.show()
    count+=1
    if count%100==0:
        gc.collect()
        print(count)
        print((time.time()-t1))
    
print((time.time()-t1))
print(max_w, max_h, min_w, min_h)
print(counts)
print(label_counts)

100
35.84386610984802
200
63.015703439712524
300
86.83172297477722
400
123.0325517654419
500
154.57428193092346
600
232.01140642166138
700
267.16561794281006
800
286.43241357803345
900
307.0557301044464
1000
322.20960879325867
1100
334.43564343452454
1200
351.1432144641876
1300
370.3830246925354
1400
387.9676263332367
1500
407.743775844574
1600
430.16234827041626
1700
533.25239777565
1800
770.5090777873993
1900
1055.697925567627
2000
1241.8436603546143
2100
1392.105262517929
2200
1562.8205502033234
2300
1577.3249225616455
2400
1599.721957206726
2500
1638.8086278438568
2600
1647.3492488861084
2700
1681.4737355709076
2800
1696.5142893791199
2900
1712.857860326767
3000
1737.6331858634949
1753.2574436664581
6720 8192 352 320
[  0.   0.   0.   8. 158. 338. 596. 463. 202. 162. 322.  41. 115.  10.
  30.  15.  18.   4.   9.  29.  21.   6.   2.   2.  11.   8.   1.   0.
   4.   4.  15.  31.  38.   2.   2.   2.   0.   2.   3.   1.  87.   1.
 300.   1.   0.   1.   1.   1.   0.   2.   1.   0.   1. 

In [None]:
for i in counts

In [None]:
transformed_dataset = ProjectDataset(csv_file='D:/pravesh/Concordia/2021-Winter/COMP-6721-Intro_To_AI/project/comp-6721-project/label_info.csv'
                                     ,root_dir='D:/pravesh/Concordia/2021-Winter/COMP-6721-Intro_To_AI/project/1/dataset/images/'
                                     ,transform=transforms.Compose([
                                         Rescale(512)
                                         ,ToTensor()
                                     ]))

In [None]:
dataloader = DataLoader(transformed_dataset, batch_size=16,
                        shuffle=True, num_workers=0)