In [1]:
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import zipfile
import io
import torch
from torchvision import transforms

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
def get_file_paths(zipArchive):
    file_paths = []
    image_zip = zipfile.ZipFile(zipArchive, 'r')
    namelist = image_zip.namelist()
    for li in namelist:
        if not image_zip.getinfo(li).is_dir():
            file_paths.append(li)
    image_zip.close()
    return file_paths

In [4]:
class MyDatasetZip(Dataset):
    def __init__(self, archive, transform=[]):
        self.archive = archive
        self.data_files = get_file_paths(self.archive)
        self.image_zip = zipfile.ZipFile(self.archive, 'r')
        self.transform = transform

    def __getitem__(self, idx):
        data = self.load_file(self.data_files[idx])
        if self.transform:
          data = self.transform(data)
        return data

    def __len__(self):
        return len(self.data_files)

    def load_file(self, img_path):
        data = self.image_zip.read(img_path)
        dataEnc = io.BytesIO(data)
        img = Image.open(dataEnc)
        img_data = np.asarray(img, dtype=float) / 255
        # image_zip.close()
        return img_data


In [5]:
def get_mean_std_dataset(imageArchive):
  dataset = MyDatasetZip(imageArchive, transform=transforms.Compose([transforms.ToTensor()]))
  BATCH_SIZE = 8
  loader = DataLoader(dataset, num_workers=1, batch_size=BATCH_SIZE)
  dataset_sample_size = next(iter(loader)).shape
  print("Dataset count : ", len(loader.dataset))
  print("Dataset shape : ", dataset_sample_size)

  # channels = 1
  # if(len(dataset_sample_size) == 4):
  channels = dataset_sample_size[1]

  print("Image channels : ", channels)

  mean = torch.zeros(channels)
  std = torch.zeros(channels)
  for i, images in enumerate(loader):
      images = images.view(images.size(0), images.size(1), -1)
      mean += images.mean(2).sum(0)
      std += images.std(2).sum(0)
      if (i+1) % 4000 == 0:
        print("Done for ", i+1)


  mean /= len(loader.dataset)
  std /= len(loader.dataset)
  return mean, std

In [6]:
fg_bg = '/content/gdrive/My Drive/EVA/15A/fg_bg.zip'
fg_bg_mask = '/content/gdrive/My Drive/EVA/15A/fg_bg_mask.zip'
dense_depth = '/content/gdrive/My Drive/EVA/15A/dense_depth_out/dense_depth_1.zip'

In [7]:
fg_bg_mean, fg_bg_std = get_mean_std_dataset(fg_bg)
print("FG BG Mean ", fg_bg_mean)
print("FG BG STD ", fg_bg_std)

Dataset count :  400000
Dataset shape :  torch.Size([8, 3, 224, 224])
Image channels :  3
Done for  4000
Done for  8000
Done for  12000
Done for  16000
Done for  20000
Done for  24000
Done for  28000
Done for  32000
Done for  36000
Done for  40000
Done for  44000
Done for  48000
FG BG Mean  tensor([0.5445, 0.5092, 0.4564])
FG BG STD  tensor([0.2265, 0.2253, 0.2360])


In [8]:
fg_bg_mask_mean, fg_bg_mask_std = get_mean_std_dataset(fg_bg_mask)
print("FG BG Mask Mean ", fg_bg_mask_mean)
print("FG BG Mask STD ", fg_bg_mask_std)

Dataset count :  400000
Dataset shape :  torch.Size([8, 1, 224, 224])
Image channels :  1
Done for  4000
Done for  8000
Done for  12000
Done for  16000
Done for  20000
Done for  24000
Done for  28000
Done for  32000
Done for  36000
Done for  40000
Done for  44000
Done for  48000
FG BG Mask Mean  tensor([0.0571])
FG BG Mask STD  tensor([0.2165])


In [9]:
depth_mean, depth_std = get_mean_std_dataset(dense_depth)
print("FG BG Mask Mean ", depth_mean)
print("FG BG Mask STD ", depth_std)

Dataset count :  400000
Dataset shape :  torch.Size([8, 1, 224, 224])
Image channels :  1
Done for  4000
Done for  8000
Done for  12000
Done for  16000
Done for  20000
Done for  24000
Done for  28000
Done for  32000
Done for  36000
Done for  40000
Done for  44000
Done for  48000
FG BG Mask Mean  tensor([0.4385])
FG BG Mask STD  tensor([0.2491])
