# Feature extraction from all kids images with block5 max pool layer

In [15]:
# Here we use tensorflow.keras API to process the data by using VGG19 
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model

# to import filenames
from os import listdir
from os.path import isfile, join

# to handle data
import numpy as np

In [16]:
# Load base model with imagenet pre-trained weights 
base_model = VGG19(weights='imagenet')

In [17]:
# Here we can see the progression from layer to layer 
# The output shape column shows how the image gets compressed as 
# it pass through the layers
base_model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

## Feature extraction for a single image with block5 max pool layer

In [None]:
####################################
# To get the data If you're in Colab
####################################

# Images are storage in GitHub. By running this we clone the data into Colab
# ! git clone https://github.com/pabloinsente/CovNet_Human_Drawings
# Run this just once per sesion

# Now repo data is available in Colab local environment
# !ls CovNet_Human_Drawings/data

In [18]:
# Print filenames on dap-drawings-kids
!ls ../data/dap-drawings-kids | head

DAM001_F.jpg
DAM001_P.jpg
DAM001_T.jpg
DAM002_F.jpg
DAM002_P.jpg
DAM002_T.jpg
DAM003_F.jpg
DAM003_P.jpg
DAM003_T.jpg
DAM004_F.jpg


In [19]:
# Load and reshape the image to be feed into the model
img_path = '../data/dap-drawings-kids/DAM001_F.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

In [20]:
#######################################
# Select a layer for feature extraction
#######################################

# Here we pick the maxpooling layer in block 5
feature_layer = "block5_pool"

# To check other layers
# feature_layer = "block1_pool "
# feature_layer = "block1_conv1 "
 
model = Model(inputs=base_model.input, 
              outputs=base_model.get_layer(feature_layer).output)

In [21]:
#####################################################
# Do the feature extraction with block5 pooling layer
#####################################################

block5_pool_features = model.predict(x)

In [22]:
# In block 5, we can check that feature has the same shape that the maxpooling 
# layer in block 5 (above drawing)

# Print tensor shape
block5_pool_features.shape

(1, 7, 7, 512)

In [23]:
# Print extracted feature as a tensor (i.e., feature)
# print(block5_pool_features)

In [24]:
# Print extracted feature flattened as a 1D vector
vgg19_feature_np = np.array(block5_pool_features)
vgg19_feature_np.flatten()

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

## Feature extraction from ALL images with block5 max pool layer

In [25]:
# Get the drawings filenames from directory 

# If relative path doesn't work, change path as nedeed
path = '../data/dap-drawings-kids/'
filenames = [f for f in listdir(path) if isfile(join(path, f))]
len(filenames) # This should yield 257

257

In [26]:
#########################################
# Select the layer for feature extraction
#########################################

# A list of the layers' names is obtained by running "base_model.summary()"
feature_layer = "block5_pool" # let's take the last max pool as example
model = Model(inputs=base_model.input, 
              outputs=base_model.get_layer(feature_layer).output)

In [27]:
##########################################
# Do the feature extraction for all images
##########################################

# Let's create a list to save flattened tensors as vectors
vgg19_feature_list = []

# Loop over filenames and append flattened tensor to vector list
for fname in filenames:
  # This part of the loop reshape and preprocess the input images 
  img_path = path + fname
  img = image.load_img(img_path, target_size=(224, 224))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  
  # This part of the loop extract the featues and flatten the tensors to vectors
  vgg19_feature = model.predict(x)
  vgg19_feature_np = np.array(vgg19_feature)
  vgg19_feature_list.append(vgg19_feature_np.flatten())

In [28]:
##############################################
# Pass the VGG19 feature list to a numpy array
##############################################

vgg19_feature_list_np = np.array(vgg19_feature_list)
print(type(vgg19_feature_list_np))
print(vgg19_feature_list_np.shape)

<class 'numpy.ndarray'>
(257, 25088)


In [31]:
###################################
# Export numpy array as a .npy file
###################################

#.npy files are lightweight and easier to load back on python

save_path = '../data/vectors-features/'
filename = 'vgg19_vectors_drawings_block5_pool_kids_257_raw'
np.save(save_path+filename, vgg19_feature_list_np)

In [29]:
##################################
# Export numpy array as a csv file
##################################

# If you save np arrays as .csv files, they will be very heavy
# zip files afterwards to avoid conflicts on GitHub (100mg limit upstream)

save_path = '../data/vectors-features/'
filename = 'vgg19_vectors_drawings_block5_pool_kids_257_raw.csv'
np.savetxt(save_path+filename, vgg19_feature_list_np, delimiter=",")