In [1]:
import os
import torch
from torchvision import datasets, transforms
from utils import loaders_by_classes, filter_loaders
from classNet import ConvNet
from target import fast_normalization_method

In [2]:
model = torch.load('./models/all_class.pth', weights_only=False)
model.eval()
model

ConvNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_drop): Dropout2d(p=0.25, inplace=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0,), (1,))  # Normalize images
])

test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

batch_size = 64
num_workers = os.cpu_count()
test_loaders = loaders_by_classes(test_set, batch_size=batch_size, shuffle=True, num_workers=num_workers) 

In [4]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

device

device(type='mps')

In [5]:
class_name = '0 - zero'

In [6]:
# Calcul des gradients pour Loss_1

filtered_loader = filter_loaders(test_loaders, class_name, batch_size, shuffle=True, num_workers=num_workers)

In [7]:

last_layer

NameError: name 'last_layer' is not defined

In [8]:
import torch

def get_layer_grad(model, layer, input, target, criterion):
    """
    Returns the gradient of the criterion with respect to the input of a specified layer.
    
    Args:
        model (torch.nn.Module): The neural network model.
        layer (torch.nn.Module): The layer at which to get the gradient (e.g., the last layer).
        input (torch.Tensor): The input data for the model.
        target (torch.Tensor): The true target for the input data.
        criterion (callable): The loss function (e.g., torch.nn.CrossEntropyLoss).

    Returns:
        torch.Tensor: The gradient of the loss with respect to the input of the specified layer.
    """
    layer_input_grad = None

    # Hook to capture the gradient of the input at the specified layer
    def backward_hook(module, grad_input, grad_output):
        nonlocal layer_input_grad
        layer_input_grad = grad_input[0]  # The gradient w.r.t. the input activations (128 dimensions)
        
    # Register the backward hook on the layer
    hook = layer.register_full_backward_hook(backward_hook)

    # Perform a forward pass and calculate the loss
    output = model(input)
    loss = criterion(output, target)

    # Perform a backward pass to calculate gradients
    model.zero_grad()  # Clear previous gradients
    loss.backward()

    # Remove the hook
    hook.remove()

    if layer_input_grad is None:
        raise RuntimeError("Layer input gradient is None; ensure the layer has `requires_grad=True`.")

    # Return the gradient of the layer's input (128 dimensions)
    return layer_input_grad.clone()

In [9]:
input, label = next(iter(filtered_loader))

In [11]:
input, label = input.to(device), label.to(device)
last_layer_name, last_layer = list(model.named_children())[-1]
get_layer_grad(model, last_layer, input, label, torch.nn.CrossEntropyLoss()).size()

torch.Size([64, 128])

In [18]:
_, layer = list(model.named_children())[-1]
layer.requires_grad_()

Linear(in_features=128, out_features=10, bias=True)

In [12]:
import torch

def get_target_grad(model, input):
    class_num = 0

    """
    Returns the gradient of the loss with respect to the input of the last layer.
    The target label is dynamically generated from the output of the model after the last layer.

    Args:
        model (torch.nn.Module): The neural network model.
        input (torch.Tensor): The input data for the model.

    Returns:
        torch.Tensor: The gradient of the loss with respect to the input of the last layer.
    """

    # Retrieve the last layer of the model
    _, layer = list(model.named_children())[-1]
    layer_input_grad = None

    ll_output = None

    # Hook to capture the output of the last layer
    def forward_hook_last(module, inp, output):
        nonlocal ll_output
        ll_output = output

    # Hook to capture the gradient of the input at the last layer
    def backward_hook(module, grad_input, grad_output):
        nonlocal layer_input_grad
        layer_input_grad = grad_input[0]  # Gradient w.r.t. the input activations

    # Register the forward hook on the last layer to capture its output
    fwd_hook = layer.register_forward_hook(forward_hook_last)
    bwd_hook = layer.register_full_backward_hook(backward_hook)

    # Perform a forward pass to calculate the model output and trigger hooks
    model.zero_grad()
    _ = model(input)

    # Dynamically generate the target based on the last layer output
    output_excluded = torch.cat([ll_output[:, :class_num], ll_output[:, class_num+1:]], dim=1)
    mean_excluded = torch.mean(output_excluded, dim=1, keepdim=True)
    target = output_excluded - mean_excluded

    # Calculate the loss using the dynamically generated target
    criterion = torch.nn.MSELoss()
    loss = criterion(output_excluded, target)  # Convert target to float if necessary for MSELoss

    # Perform a backward pass to compute gradients
    loss.backward()

    # Remove hooks after backward pass
    fwd_hook.remove()
    bwd_hook.remove()

    if layer_input_grad is None:
        raise RuntimeError("Layer input gradient is None; ensure the layer has `requires_grad=True`.")

    # Return the gradient of the layer's input
    return layer_input_grad.clone()

get_target_grad(model, input)

RuntimeError: Layer input gradient is None; ensure the layer has `requires_grad=True`.

In [13]:
get_target_grad(model, input)

RuntimeError: Layer input gradient is None; ensure the layer has `requires_grad=True`.

In [79]:
def loss_function_1(model, input, target):
    output = model(input)
    loss = Loss_1(output, target)
    return loss


def loss_function_2(model, input, target):
    """
    Calculates the Mean Squared Error (MSE) between the output of a specified layer and a target.

    Args:
        model (torch.nn.Module): The neural network model.
        input (torch.Tensor): The input data for the model.
        target (torch.Tensor): The target values for the layer output.

    Returns:
        torch.Tensor: The computed MSE loss.
    """
    _, layer = list(model.named_children())[-1]

    layer_output = None

    # Define a forward hook to capture the output of the specified layer
    def forward_hook(module, inp, output):
        nonlocal layer_output
        layer_output = output
        
    # Register the hook on the layer
    hook = layer.register_forward_hook(forward_hook)

    # Forward pass through the model
    model(input)

    # Remove the hook after forward pass
    hook.remove()

    if layer_output is None:
        raise RuntimeError("Layer output is None; ensure the layer has been reached during forward pass.")

    # Calculate MSE loss between the layer output and the target
    loss = torch.mean(layer_output, target)

    return loss

torch.Size([64, 128])

In [6]:
class_name = '0 - zero'
target, output = fast_normalization_method(class_name, model, test_loaders, device)

In [21]:
L2_loss = torch.nn.MSELoss()
loss = L2_loss(output, target)
loss.retain_grad()

In [20]:
loss.backward()

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [11]:
loss.grad

  loss.grad
