In [1]:
import torch
import numpy as np
from torchvision import transforms
import numbers
import math

import torch.nn as nn
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [75]:
# Create a neural net class
class Net(nn.Module):
    
    
    # Defining the Constructor
    def __init__(self, num_classes=2):
        super(Net, self).__init__()
        
        # In the init function, we define each layer we will use in our model
        
        # Our images are RGB, so we have input channels = 3. 
        # We will apply 12 filters in the first convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        
        # A second convolutional layer takes 12 input channels, and generates 24 outputs
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        
        # We in the end apply max pooling with a kernel size of 2
        self.pool = nn.MaxPool2d(kernel_size=2)
        
        # A drop layer deletes 20% of the features to help prevent overfitting
        self.drop = nn.Dropout2d(p=0.2)
        
        # Our 128x128 image tensors will be pooled twice with a kernel size of 2. 128/2/2 is 32.
        # This means that our feature tensors are now 32 x 32, and we've generated 24 of them
        
        # We need to flatten these in order to feed them to a fully-connected layer
        self.fc = nn.Linear(in_features=32 * 32 * 24, out_features=num_classes)

    def forward(self, x):
        # In the forward function, pass the data through the layers we defined in the init function
        
        # Use a ReLU activation function after layer 1 (convolution 1 and pool)
        x = F.relu(self.pool(self.conv1(x))) 
        
        # Use a ReLU activation function after layer 2
        x = F.relu(self.pool(self.conv2(x)))  
        
        # Select some features to drop to prevent overfitting (only drop during training)
        x = F.dropout(self.drop(x), training=self.training)
        
        # Flatten
        # x = x.view(-1, 5400)
        x = x.view(-1, 32 * 32 * 24)
        # Feed to fully-connected layer to predict class
        x = self.fc(x)
        # Return class probabilities via a log_softmax function 
        return torch.log_softmax(x, dim=1)
    
device = "cpu"
if (torch.cuda.is_available()):
    # if GPU available, use cuda (on a cpu, training will take a considerable length of time!)
    device = "cuda"

# Create an instance of the model class and allocate it to the device
# model = Net(num_classes=len(classes)).to(device)

In [6]:
path = r"C:\Users\neeraj.saini\Desktop\New folder\DeepD\model_square_tri_script.h5"
#--------------------------------------------------------#
# Can't import torch saved model directly hence first intializing the model and then loading the parameters into it.
#--------------------------------------------------------#
model = Net() 
model.load_state_dict(torch.load(path))
model.eval()

Net(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop): Dropout2d(p=0.2, inplace=False)
  (fc): Linear(in_features=24576, out_features=2, bias=True)
)

In [76]:
path = r"C:\Users\neeraj.saini\Desktop\New folder\DeepD\model_square_tri.h5"
model = torch.load(path)

In [77]:
model.eval()

Net(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop): Dropout2d(p=0.2, inplace=False)
  (fc): Linear(in_features=24576, out_features=2, bias=True)
)

In [32]:
# def pre_process_numpy_img(img):
#     assert isinstance(img, np.ndarray), f'Expected numpy image got {type(img)}'

#     img = (img - IMAGENET_MEAN_1) / IMAGENET_STD_1  # normalize image
#     return img


# def post_process_numpy_img(img):
#     assert isinstance(img, np.ndarray), f'Expected numpy image got {type(img)}'

#     if img.shape[0] == 3:  # if channel-first format move to channel-last (CHW -> HWC)
#         img = np.moveaxis(img, 0, 2)

#     mean = IMAGENET_MEAN_1.reshape(1, 1, -1)
#     std = IMAGENET_STD_1.reshape(1, 1, -1)
#     img = (img * std) + mean  # de-normalize
#     img = np.clip(img, 0., 1.)  # make sure it's in the [0, 1] range

#     return img


def pytorch_input_adapter(img):
    # shape = (1, 3, H, W)
    # tensor = transforms.ToTensor()(img).to('cpu').unsqueeze(0)
    tensor = transforms.ToTensor()(img).to('cpu')
    tensor.requires_grad = True  # we need to collect gradients for the input image
    return tensor


def pytorch_output_adapter(tensor):
    # Push to CPU, detach from the computational graph, convert from (1, 3, H, W) tensor into (H, W, 3) numpy image
    return np.moveaxis(tensor.to('cpu').detach().numpy()[0], 0, 2)

In [10]:
class CascadeGaussianSmoothing(nn.Module):
    """
    Apply gaussian smoothing separately for each channel (depthwise convolution).

    Arguments:
        kernel_size (int, sequence): Size of the gaussian kernel.
        sigma (float, sequence): Standard deviation of the gaussian kernel.

    """
    def __init__(self, kernel_size, sigma):
        super().__init__()

        if isinstance(kernel_size, numbers.Number):
            kernel_size = [kernel_size, kernel_size]

        cascade_coefficients = [0.5, 1.0, 2.0]  # std multipliers, hardcoded to use 3 different Gaussian kernels
        sigmas = [[coeff * sigma, coeff * sigma] for coeff in cascade_coefficients]  # isotropic Gaussian

        self.pad = int(kernel_size[0] / 2)  # assure we have the same spatial resolution

        # The gaussian kernel is the product of the gaussian function of each dimension.
        kernels = []
        meshgrids = torch.meshgrid([torch.arange(size, dtype=torch.float32) for size in kernel_size])
        for sigma in sigmas:
            kernel = torch.ones_like(meshgrids[0])
            for size_1d, std_1d, grid in zip(kernel_size, sigma, meshgrids):
                mean = (size_1d - 1) / 2
                kernel *= 1 / (std_1d * math.sqrt(2 * math.pi)) * torch.exp(-((grid - mean) / std_1d) ** 2 / 2)
            kernels.append(kernel)

        gaussian_kernels = []
        for kernel in kernels:
            # Normalize - make sure sum of values in gaussian kernel equals 1.
            kernel = kernel / torch.sum(kernel)
            # Reshape to depthwise convolutional weight
            kernel = kernel.view(1, 1, *kernel.shape)
            kernel = kernel.repeat(3, 1, 1, 1)
            kernel = kernel.to(DEVICE)

            gaussian_kernels.append(kernel)

        self.weight1 = gaussian_kernels[0]
        self.weight2 = gaussian_kernels[1]
        self.weight3 = gaussian_kernels[2]
        self.conv = F.conv2d

    def forward(self, input):
        input = F.pad(input, [self.pad, self.pad, self.pad, self.pad], mode='reflect')

        # Apply Gaussian kernels depthwise over the input (hence groups equals the number of input channels)
        # shape = (1, 3, H, W) -> (1, 3, H, W)
        num_in_channels = input.shape[1]
        grad1 = self.conv(input, weight=self.weight1, groups=num_in_channels)
        grad2 = self.conv(input, weight=self.weight2, groups=num_in_channels)
        grad3 = self.conv(input, weight=self.weight3, groups=num_in_channels)

        return (grad1 + grad2 + grad3) / 3

In [11]:
# LOWER_IMAGE_BOUND = torch.tensor((-IMAGENET_MEAN_1 / IMAGENET_STD_1).reshape(1, -1, 1, 1)).to(DEVICE)
# UPPER_IMAGE_BOUND = torch.tensor(((1 - IMAGENET_MEAN_1) / IMAGENET_STD_1).reshape(1, -1, 1, 1)).to(DEVICE)


def gradient_ascent(model, input_tensor, layer_ids_to_use, iteration):
    # Step 0: Feed forward pass
    out = model(input_tensor)

    # Step 1: Grab activations/feature maps of interest
    activations = [out[layer_id_to_use] for layer_id_to_use in layer_ids_to_use]

    # Step 2: Calculate loss over activations
    losses = []
    for layer_activation in activations:
        # Use torch.norm(torch.flatten(layer_activation), p) with p=2 for L2 loss and p=1 for L1 loss. 
        # But I'll use the MSE as it works really good, I didn't notice any serious change when going to L1/L2.
        # using torch.zeros_like as if we wanted to make activations as small as possible but we'll do gradient ascent
        # and that will cause it to actually amplify whatever the network "sees" thus yielding the famous DeepDream look
        loss_component = torch.nn.MSELoss(reduction='mean')(layer_activation, torch.zeros_like(layer_activation))
        losses.append(loss_component)

    loss = torch.mean(torch.stack(losses))
    loss.backward()

    # Step 3: Process image gradients (smoothing + normalization, more an art then a science)
    grad = input_tensor.grad.data

    # Applies 3 Gaussian kernels and thus "blurs" or smoothens the gradients and gives visually more pleasing results
    # We'll see the details of this one in the next cell and that's all, you now understand DeepDream!
    sigma = ((iteration + 1) / 10) * 2.0 + 0.5
    smooth_grad = CascadeGaussianSmoothing(kernel_size=9, sigma=sigma)(grad)  # "magic number" 9 just works well

    # Normalize the gradients (make them have mean = 0 and std = 1)
    # I didn't notice any big difference normalizing the mean as well - feel free to experiment
    g_std = torch.std(smooth_grad)
    g_mean = torch.mean(smooth_grad)
    smooth_grad = smooth_grad - g_mean
    smooth_grad = smooth_grad / g_std

    # Step 4: Update image using the calculated gradients (gradient ascent step)
    input_tensor.data += 0.09 * smooth_grad

    # Step 5: Clear gradients and clamp the data (otherwise values would explode to +- "infinity")
    input_tensor.grad.data.zero_()
    # input_tensor.data = torch.max(torch.min(input_tensor, UPPER_IMAGE_BOUND), LOWER_IMAGE_BOUND)


In [43]:
def deep_dream_static_image():
    layer_ids_to_use = 'layer_0'
    
    img = np.random.uniform(low=0.0, high=1.0, size=[128, 128, 3]).astype(np.float32)
    shape = img.shape
    # img = pre_process_numpy_img(img)
    original_shape = img.shape[:-1]  # save initial height and width  
    for iteration in range(10):
        input_tensor = pytorch_input_adapter(img)  # convert to trainable tensor
        gradient_ascent(model, input_tensor, ['conv1'], iteration)
        img = pytorch_output_adapter(input_tensor)
    return img
  


In [44]:
img = deep_dream_static_image()

IndexError: too many indices for tensor of dimension 2

In [33]:
img = np.random.uniform(low=0.0, high=1.0, size=[128, 128, 3]).astype(np.float32)

In [34]:
input_tensor = pytorch_input_adapter(img)

In [35]:
input_tensor.shape

torch.Size([3, 128, 128])

In [78]:
out = model(input_tensor)
print(out[0])

tensor([-1.6012e+01, -1.1921e-07], grad_fn=<SelectBackward0>)


In [66]:
# Reduced time form 60 min to some seconds.
def get_neuron_acts_layer( input_tensor, layer):
    # getting activation one layer at a time
    # Return activation corresponding to each neuron in a layer having dimension eaual to number of tokens
    cache = {}
    activation = []

    def caching_hook(input, output):
        cache["act"] = output
        # temp = []
        # for j in range(len(cache["act"][0][0])):
        #     temp.append(cache["act"][0,:,j].tolist())
        # temp = np.array(temp).squeeze()
        # print(temp.shape)
        print(output.shape)
        activation.append(output)
    
    
    model.fc.register_forward_hook(caching_hook)
    out = model(input_tensor)

    # model.run_with_hooks(
    #     fwd_hooks=[(f"blocks.{layer}.mlp.hook_post", caching_hook)]
    # )
    return activation
 

In [67]:
get_neuron_acts_layer(input_tensor, 'conv1')

TypeError: caching_hook() takes 2 positional arguments but 3 were given

In [None]:
def run_with_hooks(
        self,
        *model_args,
        fwd_hooks: List[Tuple[Union[str, Callable], Callable]] = [],
        bwd_hooks: List[Tuple[Union[str, Callable], Callable]] = [],
        reset_hooks_end=True,
        clear_contexts=False,
        **model_kwargs,
    ):

        with self.hooks(
            fwd_hooks, bwd_hooks, reset_hooks_end, clear_contexts
        ) as hooked_model:
            return hooked_model.forward(*model_args, **model_kwargs)

In [83]:
# import torch
# import torch.nn as nn

# # Define a simple neural network (just an example)
# class SimpleNet(nn.Module):
#     def __init__(self):
#         super(SimpleNet, self).__init__()
#         self.fc1 = nn.Linear(10, 20)
#         self.fc2 = nn.Linear(20, 10)

#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

# # Create an instance of the model
# model = SimpleNet()

# # Sample input data
# input_data = torch.randn(1, 10)  # Assuming input size is (1, 10)

# Dictionary to store activations for each layer
activations = {}

# Define a function to store the activations
def get_activation(name):
    def hook(model, input, output):
        activations[name] = output.detach()
    return hook

# Register hooks to store activations
target_layer = 1  # Choose the layer you want to examine
layer_name = f'conv{target_layer}'
model.conv2.register_forward_hook(get_activation(layer_name))

# Perform forward pass
with torch.no_grad():
    model.eval()
    _ = model(input_tensor)

# Access the stored activations for the chosen layer
if layer_name in activations:
    activation_value = activations[layer_name]
    print(f'Activations for layer {layer_name}:\n{activation_value}')
else:
    print(f'Layer {layer_name} activations not found.')

Activations for layer conv1:
tensor([[[-1.3593e-01, -3.4356e-01, -2.8973e-01,  ..., -3.8275e-01,
          -4.5603e-01, -2.8351e-01],
         [-4.2713e-01, -6.4379e-01, -7.7101e-01,  ..., -8.3010e-01,
          -7.0007e-01, -5.3686e-01],
         [-4.3008e-01, -7.8576e-01, -7.6857e-01,  ..., -8.4137e-01,
          -7.6311e-01, -5.9168e-01],
         ...,
         [-4.5650e-01, -7.8799e-01, -7.1777e-01,  ..., -6.8976e-01,
          -7.0851e-01, -4.3310e-01],
         [-4.0287e-01, -7.4784e-01, -7.3499e-01,  ..., -7.8667e-01,
          -7.3654e-01, -4.9855e-01],
         [-3.3910e-01, -5.1722e-01, -5.4743e-01,  ..., -6.4671e-01,
          -6.3124e-01, -4.7718e-01]],

        [[-2.3386e-01, -4.1002e-01, -4.4086e-01,  ..., -4.1223e-01,
          -5.0240e-01, -3.9036e-01],
         [-3.9593e-01, -6.4662e-01, -6.3938e-01,  ..., -8.2506e-01,
          -8.2114e-01, -5.8833e-01],
         [-3.9094e-01, -6.3415e-01, -8.1795e-01,  ..., -7.2057e-01,
          -7.5762e-01, -5.4745e-01],
         .



In [84]:
activation_value.shape

torch.Size([24, 64, 64])