---
# <span style="color:blue">**Feature Extraction**</span>
---

### **Function to count number of images in directories**
---

In [1]:
import os

def fileCount(folder):
    
    '''
        Counts the number of files inside the subdirectories of the different sets.
        Returns number of images for training, validation and test sets.
    '''
    # Counters for the total number of images for:
    test_cnt = 0 # test set
    train_cnt = 0 # train set
    valid_cnt = 0 # validation set

    # Get subdirs from parent folder
    subdirs = [ f.path for f in os.scandir(folder) if f.is_dir() ]
    
    # For each subdir
    for subdir in subdirs:
        
        # Get subfolfer
        subfolder = [ f.path for f in os.scandir(subdir) if f.is_dir() ]
        
        # Loop over the subfolders
        for folder in subfolder:  
            contents = os.listdir(folder) # list contents of the subfolder

            # Update counters
            if subdir.endswith('train'): 
                train_cnt += len(contents)
            elif subdir.endswith('valid'): 
                valid_cnt += len(contents)
            elif subdir.endswith('test'): 
                test_cnt += len(contents)
    
    return train_cnt, valid_cnt, test_cnt


---
### **Function to get data for training, validation and test & high level features according to a specific model**
---

In [2]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from numpy import savez_compressed
import numpy as np


def get_high_level_features(model_url, model_name, train_batch_size, valid_batch_size, test_batch_size):
    
    '''
        Open model with high level features and create necessary initializers  
    '''
    img_graph = tf.Graph()

    with img_graph.as_default():
        # Download module
        feature_extractor = hub.Module(model_url)

        # Get expected height, width 
        img_height, img_width = hub.get_expected_image_size(feature_extractor) 

        # Create input placeholder
        input_imgs = tf.placeholder(dtype=tf.float32, shape=[None, img_height, img_width, 3])

        # A node with the features
        imgs_features = feature_extractor(input_imgs)

        # Collect initializers
        init_op = tf.group([
            tf.global_variables_initializer(), tf.tables_initializer()
        ])

    img_graph.finalize() # make the graph "read-only" 
    
    '''
        Collect images in different sets
    '''
    
    img_generator = ImageDataGenerator(rescale=1/255)# Create image generator same for all sets

    # Create sets
    trainset = img_generator.flow_from_directory(
        os.path.join('swissroads', 'train'), batch_size=train_batch_size, target_size=(img_height, img_width), shuffle=False)

    validset = img_generator.flow_from_directory(
        os.path.join('swissroads', 'valid'), batch_size=valid_batch_size, target_size=(img_height, img_width), shuffle=False)

    testset = img_generator.flow_from_directory(
        os.path.join('swissroads', 'test'), batch_size=test_batch_size, target_size=(img_height, img_width), shuffle=False)
    
    # Collect all data from sets
    X_tr, y_tr = trainset.next()
    X_val, y_val = validset.next()
    X_te, y_te = testset.next()
    
    # Collect the labels
    tr_labels = list(trainset.class_indices)
    tr_labels = np.array(tr_labels)
    
    val_labels = list(validset.class_indices)
    val_labels = np.array(val_labels)
    
    te_labels = list(testset.class_indices)
    te_labels = np.array(te_labels)
    
    # Collect filenames
    X_tr_filenames = trainset.filenames
    X_tr_filenames = [fname.split('/')[1] for fname in X_tr_filenames]
    
    X_val_filenames = validset.filenames
    X_val_filenames = [fname.split('/')[1] for fname in X_val_filenames]
    
    X_te_filenames = testset.filenames
    X_te_filenames = [fname.split('/')[1] for fname in X_te_filenames]
    
    '''
        Extract high level features per set
    '''    
    # Create a session
    sess = tf.Session(graph=img_graph)

    # Initialize it
    sess.run(init_op)

    # Extract features
    tr_features = sess.run(imgs_features, feed_dict={input_imgs: X_tr})
    val_features = sess.run(imgs_features, feed_dict={input_imgs: X_val})
    te_features = sess.run(imgs_features, feed_dict={input_imgs: X_te})    
    
    
    '''
        Save sets and features to npz files
    '''       
    # Create dictionaries for each set
    training_data = { 'data': X_tr, 'labels': y_tr, 'names':tr_labels, 'features': tr_features, 'filename': X_tr_filenames}
    validation_data = { 'data': X_val, 'labels': y_val, 'names':val_labels, 'features': val_features, 'filename': X_val_filenames}
    test_data = {'data': X_te, 'labels': y_te, 'names':te_labels, 'features': te_features, 'filename': X_te_filenames}
    
    # Save dictionaries to disk in npz format
    np.savez_compressed('trainfile_'+model_name+'.npz', **training_data)
    np.savez_compressed('validfile_'+model_name+'.npz', **validation_data)
    np.savez_compressed('testfile_'+model_name+'.npz', **test_data)



---
### **Get total number of images per set**
---

In [3]:
train_batch_size, valid_batch_size, test_batch_size = fileCount('swissroads')

---
### **Get data for training, validation and test & high level features**
---

---
#### **1. Mobilenet_v2 model**
---

In [4]:
model_url = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/2'
get_high_level_features(model_url, 'mobile_v2', train_batch_size, valid_batch_size, test_batch_size)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Found 280 images belonging to 6 classes.
Found 139 images belonging to 6 classes.
Found 50 images belonging to 6 classes.


---
#### **2. Inception_v3 model**
---

In [None]:
model_url = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1'
get_high_level_features(model_url, 'inception_v3', train_batch_size, valid_batch_size, test_batch_size)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Found 280 images belonging to 6 classes.
Found 139 images belonging to 6 classes.
Found 50 images belonging to 6 classes.
