In [62]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms as T
from types import SimpleNamespace
from avalanche.benchmarks.datasets import CORe50Dataset


In [56]:
cfg = SimpleNamespace()
cfg.batch_size = 64
cfg.data_root = "/Scratch/al183/datasets"

In [57]:
def print_mean_std(dataset, label):
    loader = DataLoader(
        dataset,
        batch_size=cfg.batch_size,
        num_workers=4,
        shuffle=False
    )
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in loader:
        # Mean over batch, height and width, but not over the channels
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1
    
    mean = channels_sum / num_batches
    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    def _to_str(tensor):
        return ", ".join([f"{x:.4f}" for x in tensor.tolist()])


    print(f"{label}: ({_to_str(mean)}), ({_to_str(std)})")
    


In [58]:
fmnist_train = datasets.FashionMNIST(cfg.data_root, train=True, download=True, transform=T.ToTensor())
fmnist_test = datasets.FashionMNIST(cfg.data_root, train=False, download=True, transform=T.ToTensor())

print_mean_std(fmnist_train, "FMNIST Train")
print_mean_std(fmnist_test,  "FMNIST Test ")


FMNIST Train: (0.2861), (0.3530)
FMNIST Test : (0.2866), (0.3523)


In [59]:
# CIFAR10
cifar10_train = datasets.CIFAR10(cfg.data_root, train=True, download=True, transform=T.ToTensor())
cifar10_test = datasets.CIFAR10(cfg.data_root, train=False, download=True, transform=T.ToTensor())

print_mean_std(cifar10_train, "CIFAR10 Train")
print_mean_std(cifar10_test,  "CIFAR10 Test ")

Files already downloaded and verified
Files already downloaded and verified
CIFAR10 Train: (0.4915, 0.4822, 0.4466), (0.2470, 0.2435, 0.2616)
CIFAR10 Test : (0.4942, 0.4851, 0.4504), (0.2467, 0.2430, 0.2616)


In [60]:
# CIFAR100
cifar100_train = datasets.CIFAR100(cfg.data_root, train=True, download=True, transform=T.ToTensor())
cifar100_test = datasets.CIFAR100(cfg.data_root, train=False, download=True, transform=T.ToTensor())

print_mean_std(cifar100_train, "CIFAR100 Train")
print_mean_std(cifar100_test,  "CIFAR100 Test ")

Files already downloaded and verified
Files already downloaded and verified
CIFAR100 Train: (0.5070, 0.4865, 0.4408), (0.2673, 0.2564, 0.2761)
CIFAR100 Test : (0.5084, 0.4871, 0.4416), (0.2681, 0.2572, 0.2769)


In [63]:
# CORE50
core50_train = CORe50Dataset(cfg.data_root, train=True, download=True, transform=T.ToTensor())
core50_test = CORe50Dataset(cfg.data_root, train=False, download=True, transform=T.ToTensor())

print_mean_std(core50_train, "CORE50 Train")
print_mean_std(core50_test,  "CORE50 Test ")

Loading labels...
Loading LUP...
Loading labels names...
Files already downloaded and verified
Loading labels...
Loading LUP...
Loading labels names...
Files already downloaded and verified
CORE50 Train: (0.6001, 0.5721, 0.5417), (0.1965, 0.2066, 0.2183)
CORE50 Test : (0.5998, 0.5575, 0.5395), (0.2600, 0.2626, 0.2821)
