## Imports

In [1]:
import numpy as np
import torch

## Configuration

In [2]:
from omegaconf import OmegaConf

CONFIG = OmegaConf.create({
    # Dataset
    "dataset": "CIFAR-10-LT",
    "ir_ratio": 100,
    "num_classes": 10,
    "train_transform_reprs": [
        "RandomHorizontalFlip()",
        "RandomCrop(32, padding=4)",
        "ToTensor()",
        "Cutout(n_holes=1, length=16)",
        "RandomApply([ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8)",
        "RandomGrayscale(p=0.2)",
        "RandomApply([GaussianBlur(kernel_size=3, sigma=[.1, 2.])], p=0.5)",
    ],
    "valid_transform_reprs": ["ToTensor()"],

    # Mean and std for normalization
    "normalize_mean": [0.4914, 0.4822, 0.4465],
    "normalize_std": [0.2023, 0.1994, 0.2010],

    # DataLoader
    "batch_size": 128,
    "num_workers": 8,
    "enable_pin_memory": True,

    # Model
    "model": "ResNet-32-akamaster",
    "dropout_rate": 0.3,
    

    # OPeNz
    "pure_noise_image_size": 32,
    "delta": 0.333333,
    "pure_noise_mean": [0.4914, 0.4822, 0.4465],
    "pure_noise_std": [0.2023, 0.1994, 0.2010],
    "pure_noise_image_size": 32,

    # BN
    "noise_bn_option": "DARBN",

    # Checkpoint
    "checkpoint_filename": "ResNet__epoch_199.pt",
})

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## Download checkpoint

In [4]:
import os
import gdown
from storage import CHECKPOINT_URLS

checkpoint_filepath = f"checkpoints/{CONFIG.checkpoint_filename}"
if not os.path.exists(checkpoint_filepath):
    gdown.download(CHECKPOINT_URLS[CONFIG.checkpoint_filename], checkpoint_filepath, quiet=False)

## Set Random Seed

In [5]:
np.random.seed(0)
torch.manual_seed(0)
# torch.use_deterministic_algorithms(True)

<torch._C.Generator at 0x7f34facc5550>

## Initialize Model

In [6]:
from initializers import initialize_model, InputNormalize
from models.noise_bn_option import NoiseBnOption

net = initialize_model(
    model_name=CONFIG.model, 
    num_classes=CONFIG.num_classes, 
    noise_bn_option=NoiseBnOption[CONFIG.noise_bn_option],
    dropout_rate=CONFIG.dropout_rate)
net = net.to(device)

normalizer = InputNormalize(
    torch.Tensor(CONFIG.normalize_mean).to(device), 
    torch.Tensor(CONFIG.normalize_std).to(device)
).to(device)

In [7]:
from checkpointing import load_checkpoint

load_checkpoint(net, optimizer=None, checkpoint_filepath=checkpoint_filepath)

In [8]:
net = net.eval()

## Load Dataset

In [9]:
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision.datasets import CIFAR10, CIFAR100
from datasets.imbalanced_cifar import IMBALANCECIFAR10, IMBALANCECIFAR100
from datasets.sampling import count_class_frequency, compute_class_weights_on_effective_num_samples, compute_sample_weights
from models.noise_bn_option import NoiseBnOption

from datasets.cifar10lt import build_train_dataset, build_valid_dataset
from initializers import initialize_transforms

DATA_ROOT = "./data"
NUM_CLASSES = CONFIG.num_classes
train_transform = initialize_transforms(CONFIG.train_transform_reprs)
train_dataset = IMBALANCECIFAR10(root=DATA_ROOT, train=True, transform=train_transform, download=True, ir_ratio=CONFIG.ir_ratio)

class_frequency = count_class_frequency(train_dataset.targets, NUM_CLASSES)
class_weights = 1. / class_frequency
sample_weights = compute_sample_weights(train_dataset.targets, class_weights)
num_samples = len(train_dataset)
train_sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=num_samples, # https://stackoverflow.com/a/67802529
    replacement=True,
)
train_oversampling_loader = DataLoader(
    train_dataset,
    sampler=train_sampler,
    shuffle=False,
    batch_size=CONFIG.batch_size,
    num_workers=CONFIG.num_workers,
    pin_memory=CONFIG.enable_pin_memory,
)
train_loader = train_oversampling_loader

valid_transform = initialize_transforms(CONFIG.valid_transform_reprs)
valid_dataset = CIFAR10(root=DATA_ROOT, train=False, transform=valid_transform, download=True)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=CONFIG.batch_size,
    num_workers=CONFIG.num_workers,
    pin_memory=CONFIG.enable_pin_memory,
)

Files already downloaded and verified
Files already downloaded and verified


## Setup Loss Function

In [10]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss(reduction="mean")

## Compute Gradients

In [11]:
def get_gradients(net):
    grads = torch.cat([torch.flatten(param.grad) for param in net.parameters()]).cpu()
    return grads

In [12]:
# TODO: Setup train dataset with correct transforms
num_samples_per_class = torch.Tensor(class_frequency).to(device)
pure_noise_mean = torch.Tensor(CONFIG.pure_noise_mean).to(device)
pure_noise_std = torch.Tensor(CONFIG.pure_noise_std).to(device)

In [13]:
from replace_with_pure_noise import replace_with_pure_noise

ENABLE_OPEN = False

net.train()
train_labels = []
train_gradients = []
train_batch_size = []
for minibatch_i, (inputs, labels) in enumerate(train_loader):
    inputs = inputs.float().to(device)
    labels = labels.to(device)

    inputs = normalizer(inputs)

    net.zero_grad()
    noise_mask = replace_with_pure_noise(
        images=inputs,
        targets=labels,
        delta=CONFIG.delta,
        num_samples_per_class=num_samples_per_class,
        dataset_mean=pure_noise_mean,
        dataset_std=pure_noise_std,
        image_size=CONFIG.pure_noise_image_size,
    ) if ENABLE_OPEN else None
    outputs = net(inputs, noise_mask=noise_mask)
    losses = criterion(outputs, labels)
    losses.backward()

    gradients = get_gradients(net)

    train_labels.extend(labels.cpu().detach().tolist())
    train_gradients.append(gradients.cpu().detach().tolist())
    train_batch_size.append(len(labels))

train_labels = np.array(train_labels)
train_gradients = np.array(train_gradients)
train_batch_size = np.array(train_batch_size)

In [14]:
train_gradients.shape

(97, 464154)

In [15]:
train_magnitudes = np.linalg.norm(train_gradients, axis=1, ord=2)
mean_gradient_magnitude = np.sum(train_magnitudes * train_batch_size) / len(train_dataset)
print(f"Mean gradient magnitude: {mean_gradient_magnitude}")

Mean gradient magnitude: 4.990390937790069


In [16]:
def compute_mean_gradient(train_gradients, train_batch_size):
    weights = np.full(train_gradients.shape, np.expand_dims(train_batch_size, axis=1))
    mean_gradient = np.sum(train_gradients * weights, axis=0) / sum(train_batch_size)

    return mean_gradient

In [17]:
# https://stackoverflow.com/a/13849249
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

In [18]:
angles = []
mean_gradient = compute_mean_gradient(train_gradients, train_batch_size)
for train_gradient in train_gradients:
    angle = angle_between(train_gradient, mean_gradient)
    angles.append(angle)

In [19]:
directional_variance = np.var(angles)
print(f"Directional variance: {directional_variance}")

Directional variance: 0.0017780182707009143
