# Extract Features from the Image Tiles
With the artwork images tiled, we can now process each tile to extract feature data.

In [1]:
import warnings
warnings.filterwarnings("ignore", message=r"Passing", category=FutureWarning)
import tensorflow as tf
import os
import logging
import tensorflow_hub as hub
import pandas as pd
import PIL.Image as Image
import numpy as np
import sqlite3
from numpy import savez_compressed
from glob import glob
import sys

In [2]:
# turn off tensorflow INFO messages
tf.logging.set_verbosity(30)

Set some key processing parameters

In [3]:
# define the tile dimentions
TILE_WIDTH  = 96
TILE_HEIGHT = 96

# define file locations
data_file_path = "./data/"
tile_file_path = "./data/tiles/"

Set the feature extractor.

In [4]:
# define module URL
module_url = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_96/feature_vector/3"

## Helper Function

The function <code>get_image_tile_features()</code> opens a tile image and then using the feature extractor module <code>https://tfhub.dev/google/imagenet/mobilenet_v2_100_96/feature_vector/3</code>, it extracts the images features.<br/>
__NOTE:__ The feature extraction process is run on the GPU device <code>/device:GPU:0</code>. (_Just as an experiment_).

In [5]:
# define function to extract features
def get_image_tile_features(image_name):

    img_batch = np.array(Image.open(image_name), dtype = np.float32)[np.newaxis, :, :, :]/255

    # create graph
    img_graph = tf.Graph()
    
    with tf.device("/device:GPU:0"):

        with img_graph.as_default():

            feature_extractor = hub.Module(module_url)

            # create input placeholder
            input_imgs = tf.placeholder(dtype=tf.float32, shape=[None, 96, 96, 3])

            # a node with the features
            imgs_features = feature_extractor(input_imgs)

            # collect initializers
            init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])

        img_graph.finalize() 

        # create a session
        sess = tf.Session(graph=img_graph)

        # initialize it
        sess.run(init_op)

        # extract features
        features = sess.run(imgs_features, feed_dict={input_imgs: img_batch})
    
        # return the features
        return features

## Connect to the Database
Establish a connection to the RDBMS.

In [6]:
# Create a DB connection between python and the file system
conn = sqlite3.connect(''.join([data_file_path,"/database/artist.db"]))

Select all image_tags from the RDBMS. <br/>
__NOTE:__ the additional clause <code>WHERE ARTWORK_ID >= 0</code> has been implemented. This allows the process to be restarted from a give <code>artwork_id</code> value. This is useful when testing and when you can not leave the process running to completion. As you can stop it and at a later point, pick up from where you left off.

In [7]:
# select all image tags
image_data = pd.read_sql_query("""SELECT IMAGE_TAG
                                  FROM   ARTWORK_IMAGE
                                  WHERE  ARTWORK_ID >= 0;""", conn)

# NOTE: the where statement allows the process to be restarted
#       to continue from where it finished should the process
#       be stopped. the feature extraction process is quite
#       slow. the default value is 0. to start at the beginning.

## Process the Image Tiles
Extract the image feature data and save it file compressed data files. As the process runs, the "Processed image" and "tile" indicators are updated. This is useful should you wish to stop the process and restart at a later point in time. <br/>When the tiles of all images have been processed the text <code>Completed</code> is displayed.

In [8]:
image_count          = 0

# loop through datasets
for image_tag in image_data["image_tag"]:

    # arrays to store features and catagory tags
    images_features  = []
    image_tile_tags  = []
    
    # set counters
    image_tile_count = 0
    image_count     += 1
    
    # loop through all training image tiles
    for file_name in glob("".join(["./data/tiles/",image_tag,"/",image_tag,"*.png"])): 
        
        # archive image features
        images_features.append(get_image_tile_features(file_name))

        # archive image tile tag
        image_tile_tags.append(file_name[-17:-4])       
        
        # simple progress indicator. Show a '*' every 5 images processed
        image_tile_count += 1
        
        # output progress
        sys.stdout.write(f"\rProcessed image: {image_tag[-7:]} tile: {str(image_tile_count).zfill(3)}")
            
    # write file after each image has been processed (incase it fails)
    savez_compressed("".join(["./data/features/",image_tag,"_feature_set"]),images_features, image_tile_tags)

# indicate that process is finished
print('\nCompleted')

Processed image: 0103 tile: 064
Completed
