# Extracting visual features

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import h5py
from keras.applications import vgg16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
% matplotlib notebook
from ipywidgets import FloatProgress
from IPython.display import display
from skimage.transform import resize
import time

In [None]:
np.random.seed(42) 

## Loading downscaled dataset

In [None]:
def display_cifar10_image(img_flat):
    mean_pixel = [103.939, 116.779, 123.68] #mean pixels for VGG16
    img_R = img_flat[0:1024].reshape((32, 32)) - mean_pixel[0]
    img_G = img_flat[1024:2048].reshape((32, 32)) - mean_pixel[1]
    img_B = img_flat[2048:3072].reshape((32, 32)) - mean_pixel[2]
    img = np.dstack((img_R, img_G, img_B))
    return img

def extract_features(model,img,input_size=224,exit_layer = 'fc2'):
    x = resize(img,(input_size,input_size))
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    model_extractfeatures = Model(input=model.input, output=model.get_layer(exit_layer).output)
    fc2_features = np.squeeze(model_extractfeatures.predict(x))
    #print(fc2_features.shape)
    #fc2_features = fc2_features.reshape((4096,1))
    return fc2_features

In [None]:
def extract(X, model):
    extracted_features = np.zeros((X.shape[0],4096))
    
    start = time.time()
    for i in range(len(X_train)):
        extracted_features[i] = extract_features(model,img=display_cifar10_image(X[i]))
        if i%100 == 0:
            print("{} %".format(i/100))
            t = time.time() - start
            print("time eclipsed:\nSeconds: {}\nMinutes: {}\nHours: {}".format(t,t/60,t/3600))
    print("TOTAL TIME: {}".format(time.time()-start))
    
    return extracted_features


In [None]:
train = h5py.File('datasets/datatraining.h5','r')
test = h5py.File('datasets/datatest.h5','r')
X_train, y_train = train['data'][:], train['labels'][:]
X_test, y_test = test['data'][:], test['labels'][:]

In [None]:
model = vgg16.VGG16(weights='imagenet', include_top=True)

In [None]:
extracted_features = np.zeros((X_train.shape[0],4096))

In [None]:
start = time.time()
f = FloatProgress(min=0, max=len(X_train))
display(f)
for i in range(len(X_train)):
    extracted_features[i] = extract_features(model,img=display_cifar10_image(X_train[i]))
    f.value+=1
    if i%100 == 0:
        print("{} %".format(i/100))
        t = time.time() - start
        print("time eclipsed:\nSeconds: {}\nMinutes: {}\nHours: {}".format(t,t/60,t/3600))
print("TOTAL TIME: {}".format(time.time()-start))

In [None]:
h5f = h5py.File('extracted_tsne.h5', 'w')
h5f.create_dataset('extracted_data', data=extracted_features)
h5f.close()

In [None]:
extracted_features_test = np.zeros((X_test.shape[0],4096))

In [None]:
f = FloatProgress(min=0, max=len(X_test))
display(f)
for i in range(len(X_train)):
    extracted_features_test[i] = extract_features(model,img=display_cifar10_image(X_test[i]))
    f.value+=1
    if i%100 == 0:
        print("{} %".format(i/10))
        t = time.time() - start
        print("time eclipsed:\nSeconds: {}\nMinutes: {}\nHours: {}".format(t,t/60,t/3600))
print("TOTAL TIME: {}".format(time.time()-start))

In [None]:
h5f = h5py.File('extracted_tsne_test.h5', 'w')
h5f.create_dataset('extracted_data', data=extracted_features_test)
h5f.close()