In [71]:
from torchvision.datasets import CIFAR10, CIFAR100, MNIST, KMNIST, FashionMNIST,SVHN
from torch.utils.data import DataLoader, SubsetRandomSampler
from data import GroceryStore,HistAerial,FractalDB60, TinyImageNet
import numpy as np
from torchvision import transforms
import matplotlib.pyplot as plt
import torch
import scipy

In [40]:
d = KMNIST(root="/data/datasets/kmnist", train=True).data
d = d / 255.
d.shape, d.mean(), d.std()

(torch.Size([60000, 28, 28]), tensor(0.1918), tensor(0.3483))

In [41]:
d = MNIST(root="/data/datasets/mnist", train=True).data
d = d / 255.
d.shape, d.mean(), d.std()

(torch.Size([60000, 28, 28]), tensor(0.1307), tensor(0.3081))

In [79]:
d = FashionMNIST(root="/data/datasets/fashionmnist", train=True).data
d = d / 255.
d.shape, d.mean(), d.std()

(torch.Size([60000, 28, 28]), tensor(0.2860), tensor(0.3530))

In [77]:
d = CIFAR10(root="/data/datasets/cifar10", train=True).data
d = d.reshape(-1, 3) / 255.
d.shape, d.mean(axis=0), d.std(axis=0)

((51200000, 3),
 array([0.49139968, 0.48215841, 0.44653091]),
 array([0.24703223, 0.24348513, 0.26158784]))

In [78]:
d = CIFAR100(root="/data/datasets/cifar100", train=True).data
d = d.reshape(-1, 3) / 255.
d.shape, d.mean(axis=0), d.std(axis=0)

((51200000, 3),
 array([0.50707516, 0.48654887, 0.44091784]),
 array([0.26733429, 0.25643846, 0.27615047]))

In [49]:
def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in dataloader:
        # Mean over batch, height and width, but not over the channels
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1
    
    mean = channels_sum / num_batches

    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [58]:
train_dataset = CIFAR10(root='C:/DataSets/cifar10', train=True, 
                                 transform=transforms.Compose(
            [
                transforms.Resize(32),                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

Files already downloaded and verified


(tensor([0.4915, 0.4822, 0.4466]), tensor([0.2470, 0.2435, 0.2616]))

In [59]:
train_dataset = GroceryStore(root="C:/DataSets/grocerystore",split="train", 
                                 transform=transforms.Compose(
            [
                transforms.Resize(48),
                transforms.CenterCrop(32),
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

(tensor([0.5525, 0.4104, 0.2445]), tensor([0.2205, 0.1999, 0.1837]))

In [60]:
train_dataset = HistAerial(root="C:/DataSets/histaerial",dataset_type="25x25", 
                                 transform=transforms.Compose(
            [                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

(tensor([0.4621, 0.4621, 0.4621]), tensor([0.2766, 0.2766, 0.2766]))

In [61]:
train_dataset = HistAerial(root="C:/DataSets/histaerial",dataset_type="50x50", 
                                 transform=transforms.Compose(
            [                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

(tensor([0.4625, 0.4625, 0.4625]), tensor([0.2764, 0.2764, 0.2764]))

In [62]:
train_dataset = HistAerial(root="C:/DataSets/histaerial",dataset_type="100x100", 
                                 transform=transforms.Compose(
            [                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

(tensor([0.4616, 0.4616, 0.4616]), tensor([0.2759, 0.2759, 0.2759]))

In [64]:
train_dataset = FractalDB60(root="C:/DataSets/test/fractaldb60", 
                                 transform=transforms.Compose(
            [                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

(tensor([0.0622, 0.0622, 0.0622]), tensor([0.1646, 0.1646, 0.1646]))

In [67]:
train_dataset = TinyImageNet(root="C:/DataSets/test/tinyimagenet", 
                                 transform=transforms.Compose(
            [                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

(tensor([0.4802, 0.4481, 0.3975]), tensor([0.2764, 0.2689, 0.2816]))

In [None]:
train_dataset = SVHN(root="C:/DataSets/svhn", 
                                 transform=transforms.Compose(
            [                
                transforms.ToTensor()
            ]),download=True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64)

get_mean_and_std(train_dataloader)

Using downloaded and verified file: C:/DataSets/svhn\train_32x32.mat


(tensor([0.4377, 0.4438, 0.4728]), tensor([0.1980, 0.2010, 0.1970]))

In [70]:
!pip install scipy

Collecting scipy
  Downloading scipy-1.8.0-cp39-cp39-win_amd64.whl (36.9 MB)
     ---------------------------------------- 36.9/36.9 MB 3.1 MB/s eta 0:00:00
Installing collected packages: scipy
Successfully installed scipy-1.8.0
