In [2]:
from keras.applications import VGG16

In [3]:
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [5]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
__________

In [6]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

In [11]:
base_dir = '/Users/Misko/PycharmProjects/Playfield/data/DogsCatsData'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

In [16]:
def extract_features(directory, sample_count):
    '''
    Flows images from file using a ImageDataGenerator into the convnet base 
    and saves the output features (and corresponding labels) in numpy arrays.
    '''
    
    features = np.zeros(shape=(sample_count, 4, 4, 512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(directory, target_size=(150, 150), batch_size=batch_size, class_mode='binary')

    i = 0
    for inputs_batch, labels_batch in generator:
        print("i =", i)
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        
        # Break generator once all samples are yielded
        if i * batch_size > sample_count:
            break;
    return features, labels

In [17]:
train_features, train_labels = extract_features(train_dir, 2000)
validation_features, validation_labels = extract_features(validation_dir, 1000)
test_features, test_labels = extract_features(test_dir, 1000)

Found 2000 images belonging to 2 classes.
i = 0
i = 1
i = 2
i = 3


KeyboardInterrupt: 

In [None]:
# Flatten arrays for input in a densly connected layer
train_features = np.reshape(train_features, (2000, 4 * 4 * 500))
validation_features = np.reshape(validation_features, (1000, 4 * 4 * 500))
test_features = np.reshape(test_features, (1000, 4 * 4 * 500))