In [1]:
# module import
import os
import sys
from datetime import datetime
import pickle

import numpy as np
import PIL.Image
import torch
import torchvision
                
sys.path.append('../cnn_preferred')
from utils import normalise_img, clip_extreme_pixel,  get_cnn_features, img_deprocess, get_target_feature_shape
from activation_maximization import generate_preferred

In [2]:
## load network
net = torchvision.models.vgg19(pretrained=True)
net.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(inplace)

In [3]:
# image mean and std for pre/de-process image for input network
img_mean=np.array([0.485, 0.456, 0.406],dtype=np.float),
img_std = np.array([0.229,0.224,0.225])

# if the model input is for 0-1 range, norm = 255, elif 0-255, norm = 1
norm = 255

In [4]:
# create save_dir
save_dir = '../result'
save_folder = 'jupyter_demo_torch_simpleCNN_conv'
save_folder = save_folder + '_' + datetime.now().strftime('%Y%m%dT%H%M%S')
save_path = os.path.join(save_dir,save_folder)
os.makedirs(save_path, exist_ok=True)

In [5]:
# initial image for the optimization
h, w = 224,224
initial_input = np.random.randint(0, 256, (h,w,3))

In [6]:
net

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(inplace)

In [7]:
target_layer = "features[2]"
target_layer = "features[10]"

In [8]:
#target layer setting
exec_str_list = [target_layer]
## obtain target feature shape
# transform input shape for torch avairable shape
initial_torch_input = torch.Tensor(initial_input.transpose(2,0, 1)[np.newaxis])
# obtain target layer activation 
feat_shape = get_target_feature_shape(net, initial_torch_input, exec_str_list)

In [9]:
feat_shape

(1, 256, 56, 56)

In [10]:
# options
opts = {
    'img_mean': img_mean, # img_mean to preprocessing input image (the default is [0.485, 0.456, 0.406]) 
    'img_std': img_std,   # img_std to preprocessing input image  (the default is [0.229,0.224,0.225]) 
    'norm': norm,         # if the model input is for 0-1 range, norm = 255, elif 0-255, norm = 1 (defalt is 255)
    
    'iter_n': 200, # the total number of iterations for gradient descend (defalt is 200)

    'disp_every': 1, # display the information on the terminal for every n iterations (default is 1)

    'save_intermediate': True, # save the intermediate or not (default is None)
    'save_intermediate_every': 10, # save the intermediate for every n iterations (default is 10)
    'save_intermediate_path': save_path, # the path to save the intermediate (default is None)

    'lr_start': 1., # learning rate (default is 1.)
    'lr_end': 1.,   # we can change learning rate linearly setteing these two parameters 

    'momentum_start': 0.001, # gradient with momentum (default is 0.001)
    'momentum_end': 0.001,   # we can change momentum linearly setteing these two parameters too 

    'decay_start': 0.001, # pixel decay for each iteration (default is 0.001)
    'decay_end': 0.001,   # we can also change pixel decay linealy  

    'image_blur': True, # Use image smoothing or not (default is True)
    'sigma_start': 2.5, # the size of the gaussian filter for image smoothing (default is 2.5)
    'sigma_end': 0.5,   

    'image_jitter': True, # use image jittering during optimization (default is True)
    'jitter_size': 4,     # the size of jitter (default is 32)
    
    'use_p_norm_reg': False, # use p_norm regularization (default is False)
    'p': 2,

    'use_TV_norm_reg': False, # use total variance norm (default is False)
    'TVbeta1': 1,             # the order of  spatial domain
    'TVbeta2':1.2,            # the order temporal domain (for video input)

    'clip_small_norm': True,   # clip or not the pixels with extreme high or low value (default True) 
    'clip_small_norm_every': 1,
    'n_pct_start': 5,
    'n_pct_end': 5,

    'clip_small_contribution': True, # clip or not the poxels with smal contribution norm of RGB channels
    'clip_small_contribution_every': 1,
    'c_pct_start': 5,
    'c_pct_end':5,
    
    'initial_input': initial_input, # the initial image for the optimization (setting to None will use random noise as initial image)
    }



In [11]:
# set channel index and center postion (for setting feature mask)
channel_list = [14,56]
y_index = int(feat_shape[2]/2)
x_index = int(feat_shape[3]/2)

In [13]:
for channel in channel_list:
    #
    print('')
    print('channel='+str(channel))
    print('')
    
    # Instead to setting target channel, generate_preferred function also accepts feature mask, which
    # The values of the mask array are binary, (1: target uint; 0: irrelevant unit) and whose shape is 
    # the same as that of target layer
    
    #create feature_mask
    feature_mask = np.zeros(feat_shape)
    # Only try to maximize the center of unit
    feature_mask[0,channel, y_index, x_index] = 1.
        
    # activation maximization
    preferred_stim = generate_preferred(net, exec_str_list, feature_mask=feature_mask, **opts)
    # save the results
    save_name = 'preferred_img' + '_layer_' + str(target_layer) + '_channel_' + str(channel) + '.npy'
    np.save(os.path.join(save_path,save_name), preferred_stim)

    save_name = 'preferred_img' + '_layer_' + str(target_layer) + '_channel_' + str(channel) + '.jpg'
    # To better display the image, clip pixels with extreme values (0.02% of
    # pixels with extreme low values and 0.02% of the pixels with extreme high
    # values). And then normalise the image by mapping the pixel value to be
    # within [0,255].
    PIL.Image.fromarray(normalise_img(clip_extreme_pixel(preferred_stim, pct=0.04))).save(
                    os.path.join(save_path, save_name))


channel=14

iter=1; mean(abs(feat))=2.92657;
iter=2; mean(abs(feat))=1098.65;
iter=3; mean(abs(feat))=136.144;
iter=4; mean(abs(feat))=863.489;
iter=5; mean(abs(feat))=195.371;
iter=6; mean(abs(feat))=910.724;
iter=7; mean(abs(feat))=1620.39;
iter=8; mean(abs(feat))=1059.04;
iter=9; mean(abs(feat))=3080.9;
iter=10; mean(abs(feat))=3885.07;
iter=11; mean(abs(feat))=1914.79;
iter=12; mean(abs(feat))=2960.41;
iter=13; mean(abs(feat))=2530.98;
iter=14; mean(abs(feat))=4043.27;
iter=15; mean(abs(feat))=4022;
iter=16; mean(abs(feat))=4747.41;
iter=17; mean(abs(feat))=498.877;
iter=18; mean(abs(feat))=1162.73;
iter=19; mean(abs(feat))=3137.27;
iter=20; mean(abs(feat))=2678.14;
iter=21; mean(abs(feat))=355.895;
iter=22; mean(abs(feat))=996.265;
iter=23; mean(abs(feat))=1927.91;
iter=24; mean(abs(feat))=142.08;
iter=25; mean(abs(feat))=3189.11;
iter=26; mean(abs(feat))=1857.26;
iter=27; mean(abs(feat))=1308.52;
iter=28; mean(abs(feat))=4303.65;
iter=29; mean(abs(feat))=2952.85;
iter=30; mean(a

iter=40; mean(abs(feat))=4207.56;
iter=41; mean(abs(feat))=37.8559;
iter=42; mean(abs(feat))=3717.15;
iter=43; mean(abs(feat))=3411.84;
iter=44; mean(abs(feat))=4056.09;
iter=45; mean(abs(feat))=4709.8;
iter=46; mean(abs(feat))=2849.47;
iter=47; mean(abs(feat))=755.085;
iter=48; mean(abs(feat))=2342.39;
iter=49; mean(abs(feat))=3013.83;
iter=50; mean(abs(feat))=5127.58;
iter=51; mean(abs(feat))=3379.27;
iter=52; mean(abs(feat))=982.992;
iter=53; mean(abs(feat))=598.288;
iter=54; mean(abs(feat))=1997.14;
iter=55; mean(abs(feat))=1255.25;
iter=56; mean(abs(feat))=2657.99;
iter=57; mean(abs(feat))=1083.29;
iter=58; mean(abs(feat))=2317.17;
iter=59; mean(abs(feat))=1047.89;
iter=60; mean(abs(feat))=3101.8;
iter=61; mean(abs(feat))=2907.18;
iter=62; mean(abs(feat))=6405.22;
iter=63; mean(abs(feat))=3908.75;
iter=64; mean(abs(feat))=1517.66;
iter=65; mean(abs(feat))=3878.66;
iter=66; mean(abs(feat))=4657.07;
iter=67; mean(abs(feat))=1604.66;
iter=68; mean(abs(feat))=3359.98;
iter=69; mean(ab