# Variance of Gradients

Variance of Gradients (VoG) is a qualitative metric to measure the "difficulty" of a given sample.

### Imports

In [None]:
import argparse
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import accuracy_score

%matplotlib inline

Set up log directory.

In [None]:
log_dir = '../logs'

### Helper functions

In [None]:
def binarize_labels(labels):
    """Binarize labels."""
    labels = LabelBinarizer().fit_transform(labels)
    if labels.shape[1] == 1:
        labels = np.hstack((1 - labels, labels))
    return labels

In [None]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.0)

## Create toy dataset

Create isotropic Gaussian clusters.

In [None]:
# Toy dataset parameters
n_samples = 500
n_features = 2
n_classes = 3
cluster_std = 2
random_state = 212 #212

test_size = 0.1

In [None]:
x, y = make_blobs(
    n_samples=n_samples,
    n_features=n_features,
    centers=n_classes,
    cluster_std=cluster_std,
    shuffle=True,
    random_state=random_state
)

x.shape, y.shape

Plot dataset.

In [None]:
_, ax = plt.subplots()
sc = ax.scatter(x[:, 0], x[:, 1], c=y)
ax.legend(*sc.legend_elements())
plt.show()

Binarize the labels.

In [None]:
y_bin = binarize_labels(y)

Split data in train and test subsets.

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y_bin, test_size=test_size)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

Finally convert to a torch.FloatTensor

In [None]:
x_train = torch.FloatTensor(x_train)
x_test = torch.FloatTensor(x_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

## FFN classifier

### Define a simple FFN module

In [None]:
class FFN(nn.Module):
    """Simple two-layer MLP for point classification."""
    def __init__(self, input_dim, hidden_dim, n_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Linear(hidden_dim, n_classes),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        return self.model(x)

In [None]:
hidden_dim = n_features * 4

model = FFN(n_features, hidden_dim, n_classes)

### Train FFN

In [None]:
n_epochs = 15
lr = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=lr
)

In [None]:
model.apply(init_weights)

for epoch in range(n_epochs):
    ### TRAINING ###
    # Prepare model and optimizer for training
    model.train()
    optimizer.zero_grad()
    
    # Get model predictions
    y_pred = model(x_train)
    
    # Calculate loss and accuracy
    loss = criterion(y_pred, y_train)
    acc = accuracy_score(
        y_pred.detach().numpy().argmax(axis=1),
        y_train.detach().numpy().argmax(axis=1)
    )
    
    # Print updates
    print(f"epoch {epoch+1}/{n_epochs}\tloss : {loss.item()}\tacc : {acc}")
    
    # Update model parameters
    loss.backward()
    optimizer.step()
    
    ### GET GRADIENTS ###
    # model.eval()
    # class_pred = torch.argmax(y_pred, dim=1)

In [None]:
y_pred[:5,:], y_train[:5,:]

### Visualize samples with highest and lowest VoG