## On Measuring Excess Capacity in Neural Networks

Florian Graf, Sebastian Zeng, Bastian Rieck, Marc Niethammer and Roland Kwitt

### Dataset Norm

This notebook computes the training data norms for all CIFAR10/100 and Tiny-ImageNet-200.

In [12]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import sys
import os

import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import CIFAR100, CIFAR10

PARENT_DIR = os.path.abspath(os.path.join('..'))
if PARENT_DIR not in sys.path:
    sys.path.append(module_path)

from utils.misc import get_ds_and_dl
from utils.tiny import TinyImageNetDataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
dataset_info = {
    'cifar10': {
        'data_dir': '../data'
    },
    'cifar100': {
        'data_dir': '../data'
    },
    'tiny-imagenet-200': {
        'data_dir': '../data'
    }
}

In [18]:
def compute_dataset_norm(dl):
    s = 0
    norms = []
    for img,lab in dl:
        n = img.norm(p=2,dim=[1,2,3])**2
        norms += n.tolist()
        s += img.norm(p=2)**2
    return np.sqrt(np.sum(norms)), np.sqrt(s).item()

In [20]:
for ds_name, info in dataset_info.items():
    ds_trn, ds_tst, dl_trn, dl_tst, num_classes = get_ds_and_dl(
        ds_name, 
        info['data_dir'], 
        batch_size=128, 
        num_workers=4, 
        limit_to=-1, 
        randomize=False)
    
    v0,v1 = compute_dataset_norm(dl_trn)
    print('{:20s}: ||X||={:.2f}'.format(
        ds_name, v0))

Files already downloaded and verified
Files already downloaded and verified
cifar10             : ||X||=12411.64
Files already downloaded and verified
Files already downloaded and verified
cifar100            : ||X||=12390.75
tiny-imagenet-200   : ||X||=42840.31
