In [1]:
from __future__ import print_function
from keras.models import Model

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5110)


### The Data

In [2]:
data_path = "/home/ubuntu/data/"

In [3]:
#List of training images

import os, shutil
import pandas as pd
dataset_path = data_path + "train/"
train_images = pd.DataFrame(columns=["Class", "Image", "Imagepath"])
for (folder, subs, files) in os.walk(dataset_path):
    for filename in files:
        label = folder.split("/")[-1]
        imagepath = os.path.join(folder, filename)
        train_images = train_images.append({"Class":label, 
                                                "Image":filename, 
                                                "Imagepath":imagepath}, 
                                               ignore_index=True)

In [4]:
train_images = train_images[train_images["Image"] != ".DS_Store"]

In [5]:
#Save train image list
train_images.to_csv(data_path+"train_set.csv", index=False)

In [6]:
#List of test images

dataset_path = data_path + "test_stg1/"
test_images = pd.DataFrame(columns=["Image", "Imagepath"])
for (folder, subs, files) in os.walk(dataset_path):
    for filename in files:
        imagepath = os.path.join(folder, filename)
        test_images = test_images.append({"Image":filename, 
                                                "Imagepath":imagepath}, 
                                               ignore_index=True)

In [7]:
test_images = test_images[test_images["Image"] != ".DS_Store"]

In [8]:
#Save test image list
test_images.to_csv(data_path+"test_set.csv", index=False)

In [9]:
#Image lists
train_image_list = train_images["Imagepath"].as_matrix()
test_image_list = test_images["Imagepath"].as_matrix()

print(len(train_image_list))
print(len(test_image_list))

3777
1000


### Feature Extraction

Extracting features with pretrained deep models

In [10]:
from keras import backend as K
K.set_image_dim_ordering('tf')

In [11]:
#Function to load an image as a numpy matrix

import numpy as np
from keras.preprocessing.image import img_to_array, load_img
def get_image_as_X(path_to_image_file, target_size):
    img = load_img(path_to_image_file, grayscale=False, target_size=target_size)
    x = img_to_array(img, K.image_data_format())
    x = np.expand_dims(x, axis=0)
    return x

In [12]:
#Function to extract features from an image with a pretrained model

def extract_features(image_list, image_size, model, preprocessing_function, batch_size = 32):
    features = []
    total_size = len(image_list)
    batches = total_size / batch_size
    print("Extracting features from images...")
    for i in range(batches+1):
        start = i*batch_size
        end = (i+1)*batch_size
        end = end if end <= total_size else total_size
        print(start,end)
        image_subset = image_list[start:end]
        try:
            X = []
            for image in image_subset:
                x = get_image_as_X(image, image_size)
                x = preprocessing_function(x)
                X.append(x)

            X = np.array(X)
            s = X.shape
            ns = (s[0], s[2], s[3], s[4])
            X = X.reshape(ns)

            p = model.predict_on_batch(X)
            features.extend(p)
            
        except Exception as e:
            print("Fail with images index:", (start, end))
            print(e)
            continue
            
    return features

In [13]:
import warnings
warnings.filterwarnings("ignore")

Model: Inception V3, with weights pre-trained on ImageNet.

In [14]:
img_size=(299*2, 299*2)

In [15]:
from keras.applications import inception_v3
model = inception_v3.InceptionV3(weights='imagenet', include_top=False, pooling='avg')
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, None, None, 32 864                                          
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, None, None, 32 96                                           
____________________________________________________________________________________________________
activation_1 (Activation)        (None, None, None, 32 0                                            
___________________________________________________________________________________________

In [17]:
%%time
features_train = extract_features(train_image_list, img_size, model, inception_v3.preprocess_input, batch_size = 64)

Extracting features from images...
0 64
64 128
128 192
192 256
256 320
320 384
384 448
448 512
512 576
576 640
640 704
704 768
768 832
832 896
896 960
960 1024
1024 1088
1088 1152
1152 1216
1216 1280
1280 1344
1344 1408
1408 1472
1472 1536
1536 1600
1600 1664
1664 1728
1728 1792
1792 1856
1856 1920
1920 1984
1984 2048
2048 2112
2112 2176
2176 2240
2240 2304
2304 2368
2368 2432
2432 2496
2496 2560
2560 2624
2624 2688
2688 2752
2752 2816
2816 2880
2880 2944
2944 3008
3008 3072
3072 3136
3136 3200
3200 3264
3264 3328
3328 3392
3392 3456
3456 3520
3520 3584
3584 3648
3648 3712
3712 3776
3776 3777
CPU times: user 20min 26s, sys: 5min 55s, total: 26min 21s
Wall time: 26min 20s


In [18]:
features_train = np.array(features_train)
print(features_train.shape)

(3777, 2048)


In [19]:
%%time
features_test = extract_features(test_image_list, img_size, model, inception_v3.preprocess_input, batch_size = 64)
features_test = np.array(features_test)
print(features_test.shape)

Extracting features from images...
0 64
64 128
128 192
192 256
256 320
320 384
384 448
448 512
512 576
576 640
640 704
704 768
768 832
832 896
896 960
960 1000
(1000, 2048)
CPU times: user 5min 24s, sys: 1min 34s, total: 6min 58s
Wall time: 6min 58s


In [20]:
np.save(data_path+"features_inception_v3_2a_train.npy", features_train)
np.save(data_path+"features_inception_v3_2a_test.npy", features_test)

Model: VGG19 model, with weights pre-trained on ImageNet.

In [21]:
img_size=(224*2, 224*2)
from keras.applications import vgg19
model = vgg19.VGG19(weights='imagenet', include_top=False, pooling='avg')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [22]:
%%time
features_train = extract_features(train_image_list, img_size, model, vgg19.preprocess_input, batch_size = 64)

Extracting features from images...
0 64
64 128
128 192
192 256
256 320
320 384
384 448
448 512
512 576
576 640
640 704
704 768
768 832
832 896
896 960
960 1024
1024 1088
1088 1152
1152 1216
1216 1280
1280 1344
1344 1408
1408 1472
1472 1536
1536 1600
1600 1664
1664 1728
1728 1792
1792 1856
1856 1920
1920 1984
1984 2048
2048 2112
2112 2176
2176 2240
2240 2304
2304 2368
2368 2432
2432 2496
2496 2560
2560 2624
2624 2688
2688 2752
2752 2816
2816 2880
2880 2944
2944 3008
3008 3072
3072 3136
3136 3200
3200 3264
3264 3328
3328 3392
3392 3456
3456 3520
3520 3584
3584 3648
3648 3712
3712 3776
3776 3777
CPU times: user 5min 46s, sys: 1min 19s, total: 7min 6s
Wall time: 7min 6s


In [23]:
features_train = np.array(features_train)
print(features_train.shape)

(3777, 512)


In [24]:
%%time
features_test = extract_features(test_image_list, img_size, model, vgg19.preprocess_input, batch_size = 64)
features_test = np.array(features_test)
print(features_test.shape)

Extracting features from images...
0 64
64 128
128 192
192 256
256 320
320 384
384 448
448 512
512 576
576 640
640 704
704 768
768 832
832 896
896 960
960 1000
(1000, 512)
CPU times: user 1min 31s, sys: 20.7 s, total: 1min 52s
Wall time: 1min 52s


In [26]:
np.save(data_path+"features_vgg19_2a_train.npy", features_train)
np.save(data_path+"features_vgg19_2a_test.npy", features_test)

Model: ResNet50, with weights pre-trained on ImageNet.

In [27]:
img_size=(224*2, 224*2)
from keras.applications import resnet50
model = resnet50.ResNet50(weights='imagenet', include_top=False, pooling='avg')
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_3 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D) (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, None, None, 64 9472                                         
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, None, None, 64 256                                          
___________________________________________________________________________________________

In [28]:
%%time
features_train = extract_features(train_image_list, img_size, model, resnet50.preprocess_input, batch_size = 64)

Extracting features from images...
0 64
64 128
128 192
192 256
256 320
320 384
384 448
448 512
512 576
576 640
640 704
704 768
768 832
832 896
896 960
960 1024
1024 1088
1088 1152
1152 1216
1216 1280
1280 1344
1344 1408
1408 1472
1472 1536
1536 1600
1600 1664
1664 1728
1728 1792
1792 1856
1856 1920
1920 1984
1984 2048
2048 2112
2112 2176
2176 2240
2240 2304
2304 2368
2368 2432
2432 2496
2496 2560
2560 2624
2624 2688
2688 2752
2752 2816
2816 2880
2880 2944
2944 3008
3008 3072
3072 3136
3136 3200
3200 3264
3264 3328
3328 3392
3392 3456
3456 3520
3520 3584
3584 3648
3648 3712
3712 3776
3776 3777
CPU times: user 9min 4s, sys: 2min 4s, total: 11min 9s
Wall time: 11min 9s


In [29]:
features_train = np.array(features_train)
print(features_train.shape)

(3777, 2048)


In [30]:
%%time
features_test = extract_features(test_image_list, img_size, model, resnet50.preprocess_input, batch_size = 64)
features_test = np.array(features_test)
print(features_test.shape)

Extracting features from images...
0 64
64 128
128 192
192 256
256 320
320 384
384 448
448 512
512 576
576 640
640 704
704 768
768 832
832 896
896 960
960 1000
(1000, 2048)
CPU times: user 2min 13s, sys: 35.7 s, total: 2min 49s
Wall time: 2min 49s


In [31]:
np.save(data_path+"features_resnet50_2a_train.npy", features_train)
np.save(data_path+"features_resnet50_2a_test.npy", features_test)

Model: Xception V1, with weights pre-trained on ImageNet.

In [None]:
#Jupyter needs to be restarted after setting KERAS_BACKEND=tensorflow 
K.set_image_dim_ordering('tf')

In [None]:
img_size=(299, 299)
from keras.applications import xception
model = xception.Xception(weights='imagenet', include_top=True, pooling='avg')
model.summary()

In [None]:
%%time
features_train = extract_features(train_image_list, img_size, model, xception.preprocess_input, batch_size = 64)

In [None]:
np.array(features_train).shape

In [None]:
features_train = np.array(features_train)
print(features_train.shape)

In [None]:
%%time
features_test = extract_features(test_image_list, img_size, model, xception.preprocess_input, batch_size = 64)
features_test = np.array(features_test)
print(features_test.shape)

In [None]:
np.save(data_path+"features_xception_train.npy", features_train)
np.save(data_path+"features_xception_test.npy", features_test)