In [1]:
import torch
import numpy as np
from torchvision import transforms
import numbers
import math

import torch.nn as nn
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create a neural net class
class Net(nn.Module):
    
    
    # Defining the Constructor
    def __init__(self, num_classes=2):
        super(Net, self).__init__()
        
        # In the init function, we define each layer we will use in our model
        
        # Our images are RGB, so we have input channels = 3. 
        # We will apply 12 filters in the first convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        
        # A second convolutional layer takes 12 input channels, and generates 24 outputs
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        
        # We in the end apply max pooling with a kernel size of 2
        self.pool = nn.MaxPool2d(kernel_size=2)
        
        # A drop layer deletes 20% of the features to help prevent overfitting
        self.drop = nn.Dropout2d(p=0.2)
        
        # Our 128x128 image tensors will be pooled twice with a kernel size of 2. 128/2/2 is 32.
        # This means that our feature tensors are now 32 x 32, and we've generated 24 of them
        
        # We need to flatten these in order to feed them to a fully-connected layer
        self.fc = nn.Linear(in_features=32 * 32 * 24, out_features=num_classes)

    def forward(self, x):
        # In the forward function, pass the data through the layers we defined in the init function
        
        # Use a ReLU activation function after layer 1 (convolution 1 and pool)
        x = F.relu(self.pool(self.conv1(x))) 
        
        # Use a ReLU activation function after layer 2
        x = F.relu(self.pool(self.conv2(x)))  
        
        # Select some features to drop to prevent overfitting (only drop during training)
        x = F.dropout(self.drop(x), training=self.training)
        
        # Flatten
        # x = x.view(-1, 5400)
        x = x.view(-1, 32 * 32 * 24)
        # Feed to fully-connected layer to predict class
        x = self.fc(x)
        # Return class probabilities via a log_softmax function 
        return torch.log_softmax(x, dim=1)
    
device = "cpu"
if (torch.cuda.is_available()):
    # if GPU available, use cuda (on a cpu, training will take a considerable length of time!)
    device = "cuda"

# Create an instance of the model class and allocate it to the device
# model = Net(num_classes=len(classes)).to(device)

In [6]:
#--------------------------------------------------------#
# Can't import torch saved model directly hence first intializing the model and then loading the parameters into it.
#--------------------------------------------------------#

In [3]:
path = r"C:\Users\neeraj.saini\Desktop\New folder\DeepD\model_square_tri.h5"
model = torch.load(path)

In [4]:
model.eval()

Net(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop): Dropout2d(p=0.2, inplace=False)
  (fc): Linear(in_features=24576, out_features=2, bias=True)
)

In [5]:
def pytorch_input_adapter(img):
    # shape = (1, 3, H, W)
    # tensor = transforms.ToTensor()(img).to('cpu').unsqueeze(0)
    tensor = transforms.ToTensor()(img).to('cpu')
    tensor.requires_grad = True  # we need to collect gradients for the input image
    return tensor


def pytorch_output_adapter(tensor):
    # Push to CPU, detach from the computational graph, convert from (1, 3, H, W) tensor into (H, W, 3) numpy image
    return np.moveaxis(tensor.to('cpu').detach().numpy(), 0, 2)

In [6]:
class CascadeGaussianSmoothing(nn.Module):
    """
    Apply gaussian smoothing separately for each channel (depthwise convolution).

    Arguments:
        kernel_size (int, sequence): Size of the gaussian kernel.
        sigma (float, sequence): Standard deviation of the gaussian kernel.

    """
    def __init__(self, kernel_size, sigma):
        super().__init__()

        if isinstance(kernel_size, numbers.Number):
            kernel_size = [kernel_size, kernel_size]

        cascade_coefficients = [0.5, 1.0, 2.0]  # std multipliers, hardcoded to use 3 different Gaussian kernels
        sigmas = [[coeff * sigma, coeff * sigma] for coeff in cascade_coefficients]  # isotropic Gaussian

        self.pad = int(kernel_size[0] / 2)  # assure we have the same spatial resolution

        # The gaussian kernel is the product of the gaussian function of each dimension.
        kernels = []
        meshgrids = torch.meshgrid([torch.arange(size, dtype=torch.float32) for size in kernel_size])
        for sigma in sigmas:
            kernel = torch.ones_like(meshgrids[0])
            for size_1d, std_1d, grid in zip(kernel_size, sigma, meshgrids):
                mean = (size_1d - 1) / 2
                kernel *= 1 / (std_1d * math.sqrt(2 * math.pi)) * torch.exp(-((grid - mean) / std_1d) ** 2 / 2)
            kernels.append(kernel)

        gaussian_kernels = []
        for kernel in kernels:
            # Normalize - make sure sum of values in gaussian kernel equals 1.
            kernel = kernel / torch.sum(kernel)
            # Reshape to depthwise convolutional weight
            kernel = kernel.view(1, 1, *kernel.shape)
            kernel = kernel.repeat(3, 1, 1, 1)
            kernel = kernel.to(device)

            gaussian_kernels.append(kernel)

        self.weight1 = gaussian_kernels[0]
        self.weight2 = gaussian_kernels[1]
        self.weight3 = gaussian_kernels[2]
        self.conv = F.conv2d

    def forward(self, input):
        input = F.pad(input, [self.pad, self.pad, self.pad, self.pad], mode='reflect')

        # Apply Gaussian kernels depthwise over the input (hence groups equals the number of input channels)
        # shape = (1, 3, H, W) -> (1, 3, H, W)
        num_in_channels = input.shape[1]
        grad1 = self.conv(input, weight=self.weight1, groups=num_in_channels)
        grad2 = self.conv(input, weight=self.weight2, groups=num_in_channels)
        grad3 = self.conv(input, weight=self.weight3, groups=num_in_channels)

        return (grad1 + grad2 + grad3) / 3

In [6]:
def get_neuron_act(input_tensor):
    # Dictionary to store activations for each layer
    activations = {}
    # Define a function to store the activations
    def get_activation(name):
        def hook(model, input, output):
            activations[name] = output
        return hook

    # Register hooks to store activations
    target_layer = 1  # Choose the layer you want to examine
    layer_name = f'conv{target_layer}'
    model.conv1.register_forward_hook(get_activation(layer_name))

    # Perform forward pass
    # with torch.no_grad():
    model.eval()
    _ = model(input_tensor)
    # print("Helooooooooooooooooooooooooooooo")

    # Access the stored activations for the chosen layer
    if layer_name in activations:
        activation_value = activations[layer_name]
        return activation_value
    else:
        print(f'Layer {layer_name} activations not found.')

In [25]:
# LOWER_IMAGE_BOUND = torch.tensor((-IMAGENET_MEAN_1 / IMAGENET_STD_1).reshape(1, -1, 1, 1)).to(DEVICE)
# UPPER_IMAGE_BOUND = torch.tensor(((1 - IMAGENET_MEAN_1) / IMAGENET_STD_1).reshape(1, -1, 1, 1)).to(DEVICE)


def gradient_ascent(model, input_tensor, layer_ids_to_use, iteration):
    # Step 0: Feed forward pass
    out = model(input_tensor)
    # CHeck out 
    '''
    y = input_tensor**2
    loss = y.mean()
    loss.backward()
    print(input_tensor.grad.data) # This is working and giving the value of the gradients
    '''
    # Step 1: Grab activations/feature maps of interest
    activations = get_neuron_act(input_tensor)
    # input_tensor.retain_grad()
    ################################33 Check grad
    #print(activations[0].requires_grad) #############################----------------- Giving True

    # Step 2: Calculate loss over activations
    losses = []
    for layer_activation in activations[0]:
        '''
        Use torch.norm(torch.flatten(layer_activation), p) with p=2 for L2 loss and p=1 for L1 loss. 
        But I'll use the MSE as it works really good, I didn't notice any serious change when going to L1/L2.
        using torch.zeros_like as if we wanted to make activations as small as possible but we'll do gradient ascent
        and that will cause it to actually amplify whatever the network "sees" thus yielding the famous DeepDream look
        '''
        loss_component = torch.nn.MSELoss(reduction='mean')(layer_activation, input_tensor) ###torch.zeros_like(layer_activation, requires_grad=True)
        losses.append(loss_component)
    # losses = torch.tensor(losses, requires_grad= True)
    loss = torch.mean(torch.stack(losses))
    # print(losses[0]) ########################################----tensor(0.1189, grad_fn=<MseLossBackward0>)
    # print(len(losses)) #############################----12
    # print(loss) ################################################----tensor(0.6373) ------tensor(0.6373, grad_fn=<MeanBackward0>)
    # Added requers_grad = True in loss_component. Hence next step is not needed.
    # loss.requires_grad = True # Was giving error "element 0 of variables does not require grad and does not have a grad_fn"
    loss.backward()

    # Step 3: Process image gradients (smoothing + normalization, more an art then a science)
    grad = input_tensor.grad.data # Giving error nonetype object has no attribute data. Means there is some problem in loss.backward()
    # print(grad.shape) ################################################------None ------torch.Size([12, 128, 128])
    
    # Applies 3 Gaussian kernels and thus "blurs" or smoothens the gradients and gives visually more pleasing results
    # We'll see the details of this one in the next cell and that's all, you now understand DeepDream!
    # sigma = ((iteration + 1) / 10) * 2.0 + 0.5
    # smooth_grad = CascadeGaussianSmoothing(kernel_size=9, sigma=sigma)(grad)  # "magic number" 9 just works well

    # Normalize the gradients (make them have mean = 0 and std = 1)
    # I didn't notice any big difference normalizing the mean as well - feel free to experiment
    g_std = torch.std(grad)
    g_mean = torch.mean(grad)
    smooth_grad = grad - g_mean
    smooth_grad = grad / g_std
    # smooth_grad = smooth_grad - g_mean
    # smooth_grad = smooth_grad / g_std

    # Step 4: Update image using the calculated gradients (gradient ascent step)
    # print(input_tensor.data.shape)
    # print(smooth_grad.shape)
    input_tensor.data += 0.09 * smooth_grad

    # Step 5: Clear gradients and clamp the data (otherwise values would explode to +- "infinity")
    input_tensor.grad.data.zero_()
    #input_tensor.data = torch.max(torch.min(input_tensor, 255), 0)


In [19]:
def deep_dream_static_image():
    
    img = np.random.uniform(low=0.0, high=1.0, size=[128, 128, 3]).astype(np.float32)
    shape = img.shape
    # img = pre_process_numpy_img(img)
    original_shape = img.shape[:-1]  # save initial height and width  
    for iteration in range(5000): ########## Giving more clear image at 5K. Crashing at 10K.
        input_tensor = pytorch_input_adapter(img)  # convert to trainable tensor
        # print(input_tensor.requires_grad)
        gradient_ascent(model, input_tensor, ['conv1'], iteration)
        #print(input_tensor.shape)
        img = pytorch_output_adapter(input_tensor)
    return img
  


In [26]:
img = deep_dream_static_image()

: 

In [38]:
img.shape

(128, 128, 3)

In [10]:
img

array([[[  406.70422 ,  -347.87036 ,  -344.84933 ],
        [  380.93396 ,  -476.81946 ,  -369.7387  ],
        [   90.81843 ,  -312.31415 ,  -124.41073 ],
        ...,
        [  205.78888 ,  -272.91647 ,  -161.27852 ],
        [    8.63181 ,  -127.70345 ,    11.395815],
        [  -31.049728,   -13.489065,    82.99378 ]],

       [[ -675.10297 ,   596.4533  ,   477.43005 ],
        [ -815.11554 ,  1109.0106  ,   777.89294 ],
        [ -564.91565 ,  1221.6641  ,   713.6283  ],
        ...,
        [ -342.40335 ,   672.325   ,   375.50977 ],
        [  -61.71721 ,   484.60358 ,   240.70494 ],
        [   22.231123,   187.42734 ,    52.254265]],

       [[  512.40717 ,  -368.02957 ,  -371.61823 ],
        [  623.8247  , -1099.5259  ,  -866.298   ],
        [  424.78494 , -1575.9645  , -1175.3018  ],
        ...,
        [   40.274994,  -526.94073 ,  -551.08813 ],
        [ -110.64669 ,  -592.8635  ,  -558.0601  ],
        [ -166.48653 ,  -300.7506  ,  -207.21577 ]],

       ...,

      

In [21]:
normalized_image = img / np.max(img)

# Scale the normalized image to the integer range [0, 255]
integer_image = (normalized_image * 255).astype(np.uint8)

In [22]:
integer_image

array([[[115,  25, 130],
        [168, 238, 172],
        [115,  25, 127],
        ...,
        [ 84,  24,  98],
        [181, 243, 181],
        [107,   8, 106]],

       [[169,  75, 138],
        [ 56, 136, 141],
        [234, 100, 152],
        ...,
        [134,  68, 102],
        [ 95, 158, 161],
        [203,  61, 144]],

       [[175,  75, 141],
        [ 44, 124, 147],
        [237,  96, 157],
        ...,
        [132,  62, 101],
        [ 82, 146, 162],
        [206,  61, 143]],

       ...,

       [[177,  78, 144],
        [ 34, 121, 137],
        [246, 102, 162],
        ...,
        [122,  64,  98],
        [ 86, 145, 158],
        [205,  61, 144]],

       [[158,  96, 140],
        [ 45,  92, 128],
        [243, 128, 175],
        ...,
        [106,  74,  98],
        [ 94, 126, 161],
        [204,  78, 150]],

       [[126, 114,  98],
        [ 68,  75, 137],
        [207, 152, 130],
        ...,
        [ 77,  74,  60],
        [123, 116, 162],
        [168, 103, 110]]

In [12]:
from PIL import Image

In [23]:
# Convert the NumPy array to a Pillow Image
pillow_image = Image.fromarray(integer_image)

# Save the image to a file (e.g., in PNG format)
pillow_image.save(r"C:\Users\neeraj.saini\Desktop\New folder\DeepD\conv1_10_5k.png")

In [25]:
img = np.random.uniform(low=0.0, high=1.0, size=[128, 128, 3]).astype(np.float32)

In [26]:
input_tensor = pytorch_input_adapter(img)

In [27]:
input_tensor.shape

torch.Size([3, 128, 128])

In [78]:
out = model(input_tensor)
print(out[0])

tensor([-1.6012e+01, -1.1921e-07], grad_fn=<SelectBackward0>)


In [29]:
activation_value = get_neuron_act(input_tensor)



In [30]:
activation_value.shape

torch.Size([12, 128, 128])

In [34]:
activation_value[0].shape

torch.Size([128, 128])

In [76]:
loss = torch.nn.MSELoss(reduction='mean')(activation_value, torch.zeros_like(activation_value, requires_grad=True))

In [77]:
loss

tensor(0.6373, grad_fn=<MseLossBackward0>)

In [78]:
loss.backward()

In [84]:
print(activation_value.grad)

None


In [50]:
import torch

# Create a tensor and enable gradient tracking
x = torch.tensor([3.0], requires_grad=True)
# Some computation
y = x ** 2
# Define a scalar loss
loss = y.mean()

# Perform backpropagation
loss.backward()

# Check if gradients are computed for the tensor
if x.grad is not None:
    print("Gradients for x computed successfully!")
    print("Gradients for x:", x.grad)
else:
    print("No gradients computed for x. Ensure backpropagation.")

Gradients for x computed successfully!
Gradients for x: tensor([6.])


In [47]:
input_tensor

tensor(9., grad_fn=<MeanBackward0>)

tensor([9.], grad_fn=<PowBackward0>)

In [51]:
x

tensor([3.], requires_grad=True)