In [24]:
import sys
sys.path.append('..')
from utils import load_datasets
from utils import make_loader, DataLoader
import torch
import pandas as pd

# Count pixels per class

In [27]:
data_path = '/palma/scratch/tmp/j_sten07/data/FloodNet/512px'

patch_size = 512
NUM_CLASSES = 10
classes = 'floodnet'

train_dataset, validation_dataset, test_dataset = load_datasets(data_path, random_split = True, classes = classes, patch_size=patch_size, normalize=False)#, only_test = True)
train_loader, val_loader, test_loader = make_loader(train_dataset, validation_dataset, test_dataset)

# test_dataset = load_datasets(data_path, random_split = True, classes = 'floodnet', patch_size=patch_size, normalize=False, only_test = True)
# test_loader = DataLoader(test_dataset)
# # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

In [29]:
torch.flatten(train_dataset[1][1]).bincount(minlength=NUM_CLASSES)

tensor([     0, 123506,      0,      0,      0,      0,  64845,    197,      0,
         73596])

In [30]:
class_count = torch.zeros(3, NUM_CLASSES)
for i in range(len(train_dataset)):
    class_count[0] += torch.flatten(train_dataset[i][1]).bincount(minlength=NUM_CLASSES)
for i in range(len(validation_dataset)):
    class_count[1] += torch.flatten(validation_dataset[i][1]).bincount(minlength=NUM_CLASSES)
for i in range(len(test_dataset)):
    class_count[2] += torch.flatten(test_dataset[i][1]).bincount(minlength=NUM_CLASSES)
    
print(class_count)

tensor([[2.5619e+08, 3.0361e+08, 4.6908e+08, 5.3199e+08, 7.6801e+08, 1.6679e+09,
         2.5296e+09, 2.7787e+07, 3.0649e+07, 7.9904e+09],
        [9.0506e+07, 1.0550e+08, 1.5345e+08, 1.8420e+08, 2.6361e+08, 5.5256e+08,
         8.7632e+08, 9.4109e+06, 1.0617e+07, 2.6124e+09],
        [1.4076e+08, 1.4228e+08, 1.6967e+08, 2.0183e+08, 3.4384e+08, 7.0855e+08,
         1.1369e+09, 1.1586e+07, 1.2741e+07, 3.1979e+09]])


In [31]:
pd.DataFrame(class_count.numpy()).to_csv('../results/class_count'+classes+'.csv')

In [32]:
class_count/class_count.sum(1, keepdim=True)

tensor([[0.0176, 0.0208, 0.0322, 0.0365, 0.0527, 0.1144, 0.1736, 0.0019, 0.0021,
         0.5482],
        [0.0186, 0.0217, 0.0316, 0.0379, 0.0543, 0.1137, 0.1804, 0.0019, 0.0022,
         0.5377],
        [0.0232, 0.0235, 0.0280, 0.0333, 0.0567, 0.1168, 0.1874, 0.0019, 0.0021,
         0.5272]])

# Calculate mean and standard deviation of dataset

In [9]:
mean = 0.
std = 0.
for images, _ in train_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1) # reshape: make W x H one dimension
    mean += images.mean(2).sum(0) # get mean per channel (dim 2) and sum them up for all batch elements (per channel -> dim 0)
    std += images.std(2).sum(0)
for images, _ in val_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
for images, _ in test_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    

mean_all = mean/(len(train_loader.dataset)+len(val_loader.dataset)+len(test_loader.dataset))
std_all = std/(len(train_loader.dataset)+len(val_loader.dataset)+len(test_loader.dataset))


In [10]:
print(mean_all)
print(std_all)

tensor([0.3823, 0.3625, 0.3364])
tensor([0.1172, 0.1167, 0.1203])


In [7]:
train_mean = 0.
train_std = 0.

for images, _ in train_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1) # reshape: make W x H one dimension
    train_mean += images.mean(2).sum(0) # get mean per channel (dim 2) and sum them up for all batch elements (per channel -> dim 0)
    train_std += images.std(2).sum(0)
for images, _ in val_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1)
    train_mean += images.mean(2).sum(0)
    train_std += images.std(2).sum(0)

    

train_mean = train_mean/(len(train_loader.dataset)+len(val_loader.dataset))
train_std = train_std/(len(train_loader.dataset)+len(val_loader.dataset))
print(train_mean)
print(train_std)

tensor([0.3791, 0.3599, 0.3333])
tensor([0.1176, 0.1166, 0.1203])


In [23]:
test_mean = 0.
test_std = 0.    
for images, _ in test_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1)
    test_mean += images.mean(2).sum(0)
    test_std += images.std(2).sum(0)
    
test_mean = test_mean/(len(test_loader.dataset))
test_std = test_std/(len(test_loader.dataset))
print(test_mean)
print(test_std)

tensor([0.4731, 0.3206, 0.3182])
tensor([0.1970, 0.1306, 0.1276])


In [50]:
norms = {
    'imagenet': {'mean':(0.485, 0.456, 0.406), 'std':(0.229, 0.224, 0.225)},
    'potsdam': {'mean':(0.349, 0.371, 0.347), 'std':(0.1196, 0.1164, 0.1197)},
    'potsdam_irrg': {'mean':(0.3823, 0.3625, 0.3364), 'std':(0.1172, 0.1167, 0.1203)},
    'floodnet': {'mean':(0.4159, 0.4499, 0.3466), 'std':(0.1297, 0.1197, 0.1304)},
    'vaihingen': {'mean':(0.4731, 0.3206, 0.3182), 'std':(0.1970, 0.1306, 0.1276)},
}