In [12]:
from torchvision.models import alexnet
from torchvision.models.feature_extraction import create_feature_extractor

import numpy as np

from pathlib import Path

In [2]:
model = alexnet()
model.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [3]:
with np.load("../data/kay_images.npz") as dobj:
  dat = dict(**dobj)

dat['stimuli'].shape


(1750, 128, 128)

In [4]:
from torchvision import transforms
resize_normalize = transforms.Compose([
                            transforms.ToPILImage(),
                            transforms.Resize((224, 224)),
                            transforms.ToTensor(),
                            transforms.Normalize(
                                [0.485, 0.456, 0.406],
                                [0.229, 0.224, 0.225])
                            ])


In [5]:
model2 = create_feature_extractor(model, [f"features.{i}" for i in range(1, 13)])
model2

AlexNet(
  (features): Module(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)

In [10]:
from torch.autograd import Variable
from tqdm import tqdm

features = np.array([])

# for all images in the list generate and save activations
for image_no in tqdm(range(len(dat['stimuli']))):
    # open image
    img = dat['stimuli'][image_no]

    # Convert grayscale image to RGB by repeating the channel
    if img.ndim == 2 or img.shape[2] == 1:
      img = np.stack((img,) * 3, axis=-1)

    # apply transformations before feeding to model
    input_img = Variable(resize_normalize(img).unsqueeze(0))
    x = model2.forward(input_img)
    features = np.append(features, x)

features.shape

100%|██████████| 1750/1750 [00:14<00:00, 119.70it/s]


(1750,)

In [13]:
if not Path('../data/alexnet_features.npy').exists():
    np.save('../data/alexnet_features.npy', features)
