# Initialize Vgg16 Model

In [1]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
model = VGG16(weights='imagenet', include_top=False)

In [3]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

# Initialize Image Generator for train and validation data

In [4]:
from keras.preprocessing.image import ImageDataGenerator

In [5]:
train_data_path = "../catsdogs/data/train/"
val_data_path = "../catsdogs/data/valid/"

In [6]:
train_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(train_data_path, 
                                    batch_size=64, 
                                    class_mode="categorical",
                                    shuffle=False,
                                    target_size=(224,224))

Found 23000 images belonging to 2 classes.


In [7]:
val_datagen = ImageDataGenerator()
val_generator = val_datagen.flow_from_directory(val_data_path, 
                                    batch_size=64, 
                                    class_mode="categorical",
                                    shuffle=False,
                                    target_size=(224,224))

Found 2000 images belonging to 2 classes.


# Check whether labels are correct

In [8]:
print(train_generator.samples)
print(val_generator.samples)

23000
2000


In [9]:
# for filename, label in zip(train_generator.filenames, train_generator.classes):
#    print("Filename: {}\tLabel:{}".format(filename,label))

In [10]:
# for filename, label in zip(val_generator.filenames, val_generator.classes):
#     print("Filename: {}\tLabel:{}".format(filename,label))

In [11]:
len([0 for _ in range(val_generator.samples) if val_generator.classes[_] == 1])

1000

# One hot encode labels

In [12]:
from keras.utils import to_categorical

In [13]:
train_labels = to_categorical(train_generator.classes, num_classes=2)
val_labels = to_categorical(val_generator.classes, num_classes=2)

In [14]:
train_generator.batch_size

64

In [15]:
len(val_labels)

2000

# Pickle the one hot encoded labels for later use

In [16]:
import pickle

In [17]:
!rm train_labels.pkl
!rm val_labels.pkl

In [18]:
with open("train_labels.pkl", "wb") as f:
    pickle.dump(train_labels, f)
f.close()

In [19]:
with open("val_labels.pkl", "wb") as f:
    pickle.dump(val_labels, f)
f.close()

# Extract the train and validation features

In [20]:
train_features = model.predict_generator(train_generator, steps=(train_generator.samples/train_generator.batch_size))

In [21]:
val_features = model.predict_generator(val_generator, steps=(val_generator.samples/val_generator.batch_size))

In [22]:
len(train_features)

23000

# Store the features in bcolz array

In [23]:
import bcolz

In [24]:
!rm -r train_store
!rm -r val_store
!mkdir train_store
!mkdir val_store

In [25]:
train_store = "train_store"
val_store = "val_store"

In [26]:
c = bcolz.carray(train_features, rootdir=train_store, mode='w')
d = bcolz.carray(val_features, rootdir=val_store, mode='w')
c.flush()
d.flush()

In [27]:
train_features.shape

(23000, 7, 7, 512)

In [28]:
train_features = bcolz.open(rootdir=train_store, mode='r')
train_features.shape

(23000, 7, 7, 512)

In [29]:
val_features = bcolz.open(rootdir=val_store, mode='r')
val_features.shape

(2000, 7, 7, 512)

In [30]:
train_features.flush()
val_features.flush()