# Feature Extraction
 - Extract high level features of the batch image using Mobilenet v2, save them as .npz files as trainfeature.npz, validfeature.npz and testfeature.npz.
 - Save the pixel data of the batch image as trainpixels.npz, validpixels.npz and testpixels.npz.

## 1. Import necessary libaries

In [1]:
import tensorflow_hub as hub
import numpy as np
import tensorflow as tf
import PIL.Image as Image
import numpy as np
import os



## 2. Create a node to extract the high level features of the batch image

In [2]:
img_graph = tf.Graph()

with img_graph.as_default():
    # Download module
    module_url = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/2'
    feature_extractor = hub.Module(module_url)

    # Create input placeholder
    input_imgs = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3])

    # A node with the features
    imgs_features = feature_extractor(input_imgs)

    # Collect initializers
    init_op = tf.group([
        tf.global_variables_initializer(), tf.tables_initializer()
    ])

img_graph.finalize()

sess = tf.Session(graph=img_graph)
sess.run(init_op)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


## 3. Extract high-level features of training datasets using mobile_v2

In [3]:
basepath = 'swissroads/train/'
numberoffiles = 280
trainfeatures = np.empty([numberoffiles,1280])
trainpixels = np.empty([numberoffiles,256,256,3])
traintarget = np.empty([numberoffiles])
traincount = 0
trainlabel = 0
for folders in os.listdir(basepath):
    folderpath = os.path.join(basepath, folders)
    for files in os.listdir(folderpath):
        if os.path.isfile(os.path.join(folderpath, files)):
            img = Image.open(os.path.join(folderpath, files))
            img_resized = img.resize([224, 224], resample=Image.BILINEAR) # Expected size
            img_batch = np.array(img_resized, dtype=np.float32)[np.newaxis, :, :, :]/255
            features = sess.run(imgs_features, feed_dict={input_imgs: img_batch})
            trainfeatures[traincount,:] = features
            imgpix = np.array(img)
            trainpixels[traincount,:] = imgpix
            traintarget[traincount] = trainlabel
            traincount = traincount + 1
    trainlabel = trainlabel + 1
    
np.savez('trainfeature.npz', features=trainfeatures, targets=traintarget)

with np.load('trainfeature.npz', allow_pickle=False) as npz_file:
    print(list(npz_file.keys()))
    print('x:', npz_file['features'])
    print('y:', npz_file['targets'])
    traindata = dict(npz_file.items())
    
traindata['features'].shape

np.savez('trainpixels.npz', features=trainpixels, targets=traintarget)

with np.load('trainpixels.npz', allow_pickle=False) as npz_file:
    print(list(npz_file.keys()))
    print('x:', npz_file['features'])
    print('y:', npz_file['targets'])
    trainpixeldata = dict(npz_file.items())
    
trainpixeldata['features'].shape

['features', 'targets']
x: [[2.23556256 0.55137801 0.5815767  ... 0.         0.         0.22853832]
 [0.68180573 1.93020523 0.         ... 0.         0.         1.36249781]
 [0.         0.31788585 0.         ... 0.         0.         1.27356517]
 ...
 [0.02454308 0.03523652 0.21965986 ... 0.03195597 0.54452211 0.        ]
 [0.         0.19772795 0.06374191 ... 0.4075481  0.85632575 0.09293769]
 [0.00976264 0.         0.         ... 0.         0.46101904 0.10263751]]
y: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2.

(280, 256, 256, 3)

## 4. Extract high-level features of validation datasets using mobile_v2

In [4]:
basepath = 'swissroads/valid/'
numberoffiles = 139
validfeatures = np.empty([numberoffiles,1280])
validpixels = np.empty([numberoffiles,256,256,3])
validtarget = np.empty([numberoffiles])
validcount = 0
validlabel = 0
for folders in os.listdir(basepath):
    folderpath = os.path.join(basepath, folders)
    for files in os.listdir(folderpath):
        if os.path.isfile(os.path.join(folderpath, files)):
            img = Image.open(os.path.join(folderpath, files))
            img_resized = img.resize([224, 224], resample=Image.BILINEAR) # Expected size
            img_batch = np.array(img_resized, dtype=np.float32)[np.newaxis, :, :, :]/255
            features = sess.run(imgs_features, feed_dict={input_imgs: img_batch})
            validfeatures[validcount,:] = features
            imgpix = np.array(img)
            validpixels[validcount,:] = imgpix
            validtarget[validcount] = validlabel
            validcount = validcount + 1
    validlabel = validlabel + 1
    
np.savez('validfeature.npz', features=validfeatures, targets=validtarget)

with np.load('validfeature.npz', allow_pickle=False) as npz_file:
    print(list(npz_file.keys()))
    print('x:', npz_file['features'])
    print('y:', npz_file['targets'])
    validdata = dict(npz_file.items())
    
validdata['features'].shape

np.savez('validpixels.npz', features=validpixels, targets=traintarget)

with np.load('validpixels.npz', allow_pickle=False) as npz_file:
    print(list(npz_file.keys()))
    print('x:', npz_file['features'])
    print('y:', npz_file['targets'])
    validpixeldata = dict(npz_file.items())
    
validpixeldata['features'].shape

['features', 'targets']
x: [[0.97644001 0.09868726 0.09768744 ... 0.         0.         0.03392261]
 [0.03949111 0.56884599 0.08042092 ... 0.01384979 0.         0.66017067]
 [0.01346042 0.69432169 0.43024927 ... 0.043381   0.         1.34175074]
 ...
 [0.11006478 0.         0.00320201 ... 0.         1.20225084 0.02104218]
 [0.         0.07008512 0.         ... 0.         0.00583633 0.10463603]
 [0.27085328 0.         0.         ... 0.07852338 0.72697669 0.        ]]
y: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4.
 4. 4. 4. 4. 4. 4. 4. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
['features', 'targets']
x: [[[[127. 138. 146.]
   [123. 133. 141.]
   [118. 129. 138.]
   ...
   [251.

(139, 256, 256, 3)

## 5. Extract high-level features of testing datasets using mobile_v2

In [5]:
basepath = 'swissroads/test/'
numberoffiles = 50
testfeatures = np.empty([numberoffiles,1280])
testpixels = np.empty([numberoffiles,256,256,3])
testtarget = np.empty([numberoffiles])
testcount = 0
testlabel = 0

a = open("testfilename.txt", "w")   
for folders in os.listdir(basepath):
    folderpath = os.path.join(basepath, folders)
    for files in os.listdir(folderpath):
        if os.path.isfile(os.path.join(folderpath, files)):
            img = Image.open(os.path.join(folderpath, files))
            img_resized = img.resize([224, 224], resample=Image.BILINEAR) # Expected size
            img_batch = np.array(img_resized, dtype=np.float32)[np.newaxis, :, :, :]/255
            features = sess.run(imgs_features, feed_dict={input_imgs: img_batch})
            testfeatures[testcount,:] = features
            imgpix = np.array(img)
            testpixels[testcount,:] = imgpix
            testtarget[testcount] = testlabel
            testcount = testcount + 1
            a.write(str(files))
    testlabel = testlabel + 1
    
np.savez('testfeature.npz', features=testfeatures, targets=testtarget)
with np.load('testfeature.npz', allow_pickle=False) as npz_file:
    print(list(npz_file.keys())) 
    print('x:', npz_file['features'])
    print('y:', npz_file['targets']) 
    testdata = dict(npz_file.items())
    
testdata['features'].shape

np.savez('testpixels.npz', features=testpixels, targets=traintarget)
with np.load('testpixels.npz', allow_pickle=False) as npz_file:
    print(list(npz_file.keys())) 
    print('x:', npz_file['features'])
    print('y:', npz_file['targets'])
    trainpixeldata = dict(npz_file.items())
    
trainpixeldata['features'].shape

['features', 'targets']
x: [[2.54068851e-01 2.95447975e-01 2.07443088e-01 ... 0.00000000e+00
  0.00000000e+00 3.17556769e-01]
 [2.43644327e-01 9.55323428e-02 7.70627230e-05 ... 0.00000000e+00
  0.00000000e+00 1.30346262e+00]
 [1.10596471e-01 3.82976443e-01 4.66268510e-02 ... 0.00000000e+00
  0.00000000e+00 1.64622748e+00]
 ...
 [2.35005207e-02 3.82157266e-02 0.00000000e+00 ... 6.76090196e-02
  1.52491653e+00 3.78509462e-02]
 [6.35092616e-01 6.86126411e-01 2.52151370e-01 ... 0.00000000e+00
  1.40069813e-01 1.67501062e-01]
 [8.52249097e-03 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  1.84372500e-01 9.80177522e-02]]
y: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 4. 4. 4. 4. 4. 4. 4. 5. 5. 5.
 5. 5.]
['features', 'targets']
x: [[[[121. 116. 112.]
   [122. 117. 113.]
   [123. 118. 114.]
   ...
   [ 51.  59.  61.]
   [ 69.  77.  78.]
   [ 75.  81.  81.]]

  [[122. 117. 113.]
   [122. 117. 113.]
   [122. 117. 113.]


(50, 256, 256, 3)