In [1]:
import numpy as np
import src.config as cfg
from src.data import get_loader
import torch
from tqdm import tqdm

In [2]:
loader = get_loader('mnist', 10, True)
for data, target in loader:
    break
target

[tensor([8, 3, 2, 8, 0, 4, 6, 8, 4, 6]),
 tensor([1, 0, 1, 1, 1, 1, 1, 1, 1, 1]),
 tensor([1, 0, 0, 1, 0, 0, 1, 1, 0, 1])]

In [3]:
loader = get_loader('cifar_100', 10, True)
for data, target in loader:
    break
target

[tensor([ 0, 81, 73, 88, 26, 53, 30, 53, 86, 55]),
 tensor([ 4, 19,  1,  8, 13,  4,  0,  4,  5,  0])]

In [2]:
loader = get_loader('celeba', 10, 'train')
for data, target in loader:
    break
target

[tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
 tensor([0, 1, 0, 0, 0, 0, 0, 0, 1, 0]),
 tensor([1, 1, 0, 0, 0, 1, 0, 0, 1, 1]),
 tensor([0, 0, 0, 1, 0, 1, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([1, 0, 1, 0, 0, 0, 0, 0, 1, 0]),
 tensor([0, 0, 0, 1, 0, 1, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0]),
 tensor([0, 1, 0, 0, 1, 0, 0, 0, 1, 0]),
 tensor([0, 1, 0, 1, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 1, 1, 0, 0, 0, 0, 1, 0, 1]),
 tensor([0, 0, 1, 0, 1, 0, 1, 0, 0, 1]),
 tensor([0, 0, 0, 1, 0, 1, 1, 0, 0, 0]),
 tensor([1, 1, 1, 0, 1, 0, 1, 0, 0, 1]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([1, 1, 1

In [None]:
datasets = ['mnist', 'cifar_100', 'celeba']
for dataset in datasets:
    print(dataset)
    for train, tr in zip([True, False], ['train', 'valid']):
        loader = get_loader(
            dataset,
            1000,
            train if dataset in ['mnist', 'cifar_100'] else tr,
            False
        )
        num_samples = 0
        for data, label in loader:
            num_samples += data.shape[0]
        print('Num Samples: {}'.format(num_samples))
        print('\tTrain' if train else '\tValid', data.shape, label.shape, '\n')
            

mnist


Num Samples: 60000
	Train torch.Size([1000, 1, 28, 28]) torch.Size([1000]) 



Num Samples: 10000
	Valid torch.Size([1000, 1, 28, 28]) torch.Size([1000]) 

cifar_100


Num Samples: 50000
	Train torch.Size([1000, 3, 32, 32]) torch.Size([1000]) 



Num Samples: 10000
	Valid torch.Size([1000, 3, 32, 32]) torch.Size([1000]) 

celeba


Num Samples: 162770
	Train torch.Size([770, 3, 218, 178]) torch.Size([770, 40]) 



Num Samples: 19867
	Valid torch.Size([867, 3, 218, 178]) torch.Size([867, 40]) 



In [11]:
dataset = 'mnist'
loader = get_loader(dataset, cfg.num_trains[dataset], True)
for data, target in loader:
    continue
print(data.shape)
print(data.mean(), data.std())

torch.Size([60000, 1, 28, 28])
tensor(0.1307) tensor(0.3081)


In [6]:
dataset, num_channels = 'cifar_100', 3
loader = get_loader(dataset, cfg.num_trains[dataset], True)
for data, target in loader:
    continue
print(data.shape)
for nc in range(num_channels):
    print(data[:, nc, :, :].mean(), data[:, nc, :, :].std())

torch.Size([50000, 3, 32, 32])
tensor(0.5071) tensor(0.2673)
tensor(0.4865) tensor(0.2564)


tensor(0.4409) tensor(0.2762)


In [7]:
dataset, num_channels = 'cifar_100', 3
loader = get_loader(dataset, 10000, True)
sum_channels = np.zeros((num_channels,))
for data, target in tqdm(loader, total=len(loader)):
    for nc in range(num_channels):
        sum_channels[nc] += data[:, nc, :, :].sum()
mean_channels = sum_channels/(
    cfg.num_trains[dataset] * cfg.input_sizes[dataset][1] * cfg.input_sizes[dataset][2])
abs_diff_channeles = np.zeros((num_channels,))
for data, target in tqdm(loader, total=len(loader)):
    for nc in range(num_channels):
        abs_diff_channeles[nc] += np.power(data[:, nc, :, :] - mean_channels[nc], 2).sum()
std_channels = np.power(abs_diff_channeles/(
    cfg.num_trains[dataset] * cfg.input_sizes[dataset][1] * cfg.input_sizes[dataset][2]), 0.5)
print("\nchannel means: {}\nchannel stds: {}".format(mean_channels, std_channels))

  0%|          | 0/5 [00:00<?, ?it/s]

 20%|██        | 1/5 [00:01<00:04,  1.02s/it]

 40%|████      | 2/5 [00:02<00:03,  1.01s/it]

 60%|██████    | 3/5 [00:03<00:02,  1.01s/it]

 80%|████████  | 4/5 [00:04<00:01,  1.02s/it]

100%|██████████| 5/5 [00:05<00:00,  1.01s/it]

100%|██████████| 5/5 [00:05<00:00,  1.01s/it]


  0%|          | 0/5 [00:00<?, ?it/s]

 20%|██        | 1/5 [00:01<00:06,  1.57s/it]

 40%|████      | 2/5 [00:03<00:04,  1.62s/it]

 60%|██████    | 3/5 [00:05<00:03,  1.67s/it]

 80%|████████  | 4/5 [00:06<00:01,  1.67s/it]

100%|██████████| 5/5 [00:08<00:00,  1.69s/it]

100%|██████████| 5/5 [00:08<00:00,  1.70s/it]


channel means: [0.50707516 0.48654891 0.44091781]
channel stds: [0.26733406 0.25643733 0.27614959]





In [8]:
dataset, num_channels = 'celeba', 3
loader = get_loader(dataset, 10000, 'train')
sum_channels = np.zeros((num_channels,))
for data, target in tqdm(loader, total=len(loader)):
    for nc in range(num_channels):
        sum_channels[nc] += data[:, nc, :, :].sum()
mean_channels = sum_channels/(
    cfg.num_trains[dataset] * cfg.input_sizes[dataset][1] * cfg.input_sizes[dataset][2])
abs_diff_channeles = np.zeros((num_channels,))
for data, target in tqdm(loader, total=len(loader)):
    for nc in range(num_channels):
        abs_diff_channeles[nc] += np.power(data[:, nc, :, :] - mean_channels[nc], 2).sum()
std_channels = np.power(abs_diff_channeles/(
    cfg.num_trains[dataset] * cfg.input_sizes[dataset][1] * cfg.input_sizes[dataset][2]), 0.5)
print("\nchannel means: {}\nchannel stds: {}".format(mean_channels, std_channels))

  0%|          | 0/17 [00:00<?, ?it/s]

  6%|▌         | 1/17 [00:13<03:39, 13.70s/it]

 12%|█▏        | 2/17 [00:27<03:26, 13.80s/it]

 18%|█▊        | 3/17 [00:42<03:16, 14.04s/it]

 24%|██▎       | 4/17 [00:56<03:01, 13.98s/it]

 29%|██▉       | 5/17 [01:10<02:48, 14.00s/it]

 35%|███▌      | 6/17 [01:23<02:30, 13.66s/it]

 41%|████      | 7/17 [01:35<02:13, 13.34s/it]

 47%|████▋     | 8/17 [01:49<02:02, 13.57s/it]

 53%|█████▎    | 9/17 [02:03<01:48, 13.60s/it]

 59%|█████▉    | 10/17 [02:17<01:35, 13.61s/it]

 65%|██████▍   | 11/17 [02:29<01:20, 13.38s/it]

 71%|███████   | 12/17 [02:42<01:05, 13.11s/it]

 76%|███████▋  | 13/17 [02:55<00:51, 12.96s/it]

 82%|████████▏ | 14/17 [03:08<00:39, 13.21s/it]

 88%|████████▊ | 15/17 [03:22<00:26, 13.30s/it]

 94%|█████████▍| 16/17 [03:35<00:13, 13.25s/it]

100%|██████████| 17/17 [03:38<00:00, 10.30s/it]

100%|██████████| 17/17 [03:38<00:00, 12.87s/it]


  0%|          | 0/17 [00:00<?, ?it/s]

  6%|▌         | 1/17 [00:29<07:45, 29.10s/it]

 12%|█▏        | 2/17 [00:56<07:10, 28.68s/it]

 18%|█▊        | 3/17 [01:27<06:50, 29.31s/it]

 24%|██▎       | 4/17 [01:55<06:16, 28.97s/it]

 29%|██▉       | 5/17 [02:25<05:49, 29.09s/it]

 35%|███▌      | 6/17 [02:55<05:23, 29.37s/it]

 41%|████      | 7/17 [03:24<04:53, 29.36s/it]

 47%|████▋     | 8/17 [03:52<04:19, 28.89s/it]

 53%|█████▎    | 9/17 [04:20<03:49, 28.64s/it]

 59%|█████▉    | 10/17 [04:48<03:20, 28.63s/it]

 65%|██████▍   | 11/17 [05:16<02:50, 28.39s/it]

 71%|███████   | 12/17 [05:44<02:21, 28.24s/it]

 76%|███████▋  | 13/17 [06:12<01:52, 28.20s/it]

 82%|████████▏ | 14/17 [06:41<01:24, 28.23s/it]

 88%|████████▊ | 15/17 [07:09<00:56, 28.17s/it]

 94%|█████████▍| 16/17 [07:39<00:28, 28.73s/it]

100%|██████████| 17/17 [07:47<00:00, 22.59s/it]

100%|██████████| 17/17 [07:47<00:00, 27.49s/it]


channel means: [0.50634537 0.42580516 0.3831881 ]
channel stds: [0.31064245 0.29035588 0.28972666]





In [1]:
from common.data import get_loader
from common.cifar_100 import CIFAR100 as cifar

loader = get_loader('cifar_100', 10, True)
for data, target in loader:
    print(target, cifar.get_coarse_class_ids(target))
    break

tensor([80, 77,  7, 14, 95, 12, 71, 47, 15, 45]) [16 13  7  7  0  9 10 17 11 13]


In [7]:
cifar._idx_to_fine_class[71]

'sea'

In [10]:
cifar._label_encoder.inverse_transform([10])

20