# README
We use the following script to generate data statistics

In [1]:
from data import ETHCILDataset, ETHMultiCityDataset

In [2]:
from typing import List
# utilizes code from https://discuss.pytorch.org/t/about-normalization-using-pre-trained-vgg16-networks/23560/6?u=kuzand

from torch.utils.data import DataLoader

def normalize(dataset):
    loader = DataLoader(
        dataset,
        batch_size=len(dataset),
        num_workers=1,
        shuffle=False
    )


    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in loader:
        #get rid of label if we have one
        if isinstance(data, List):
            data = data[0]

        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples
    print(f'Mean: {mean}')
    print(f'Std: {std}')

In [5]:
print('ETH CIL Dataset: ')
normalize(ETHCILDataset(args=None, data_dir='/hdd/data/cil_data_root/cil_data',split='stats'))


cities = ['paris', 'zurich', 'berlin', 'chicago']
for city in cities:
    print(f'ETH Large Satellite Dataset for {city} ')
    normalize(ETHMultiCityDataset(args=None, data_dir='/hdd/data/cil_data_root/cities_data', split='stats', cities=[city]))


print(f'ETH Large Satellite Dataset for {cities} ')
normalize(ETHMultiCityDataset(args=None, data_dir='/hdd/data/cil_data_root/cities_data', split='stats', cities=cities))


ETH CIL Dataset: 
Mean: tensor([0.5098, 0.5205, 0.5180])
Std: tensor([0.2109, 0.2011, 0.1962])
ETH Large Satellite Dataset for paris 
Mean: tensor([0.3224, 0.3629, 0.3438])
Std: tensor([0.1890, 0.2057, 0.1960])
ETH Large Satellite Dataset for zurich 
Mean: tensor([0.2436, 0.2583, 0.2360])
Std: tensor([0.1386, 0.1353, 0.1285])
ETH Large Satellite Dataset for berlin 
Mean: tensor([0.3153, 0.3344, 0.3120])
Std: tensor([0.1802, 0.1881, 0.1836])
ETH Large Satellite Dataset for chicago 
Mean: tensor([0.3372, 0.3348, 0.3009])
Std: tensor([0.1833, 0.1780, 0.1778])
ETH Large Satellite Dataset for ['paris', 'zurich', 'berlin', 'chicago'] 
Mean: tensor([0.3082, 0.3285, 0.3042])
Std: tensor([0.1752, 0.1803, 0.1745])
