In [6]:
from keras import applications

from keras.preprocessing.image import ImageDataGenerator

from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import (Dropout, Flatten, Dense, Conv2D, 
                          Activation, MaxPooling2D)

from sklearn.cross_validation import train_test_split

import os, glob
from tqdm import tqdm
import pandas as pd
from sklearn.utils import shuffle
import numpy as np
import shutil

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

In [7]:
# dimensions of our images. label = bezos, gates 2 labels
img_width, img_height = 128, 128

model = VGG16(weights='imagenet', include_top=False)

train_data_dir = 'more_train_handlabel/'
validation_data_dir = 'more_valid_handlabel/'

epochs = 50
batch_size = 16

In [9]:
# Should be (x,x,x,512d) vectors -- better than our 80ish% scratch convnet
# First, did we really randomize our data properly, or did our validation set give us a boost

train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')


test_datagen = ImageDataGenerator(rescale=1. / 255)


train_gen = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(128, 128),
        batch_size=1,
        class_mode=None,  # only data, no labels -- we're not trying to predict anything here
        shuffle=True)  # keep data in same order as labels

valid_gen = train_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(128, 128),
        batch_size=1,
        class_mode=None,  # only data, no labels -- we're not trying to predict anything here
        shuffle=False)  # keep data in same order as labels

Found 811 images belonging to 2 classes.
Found 186 images belonging to 2 classes.


In [None]:
train_probs = model.predict_generator(train_gen, 811, workers=3, verbose=1)
valid_probs = model.predict_generator(valid_gen, 186, workers=3, verbose=1)



In [10]:
print(train_probs.shape)
print(valid_probs.shape)

print(train_gen.classes.shape)
print(valid_gen.classes.shape)

(1099, 4, 4, 512)
(100, 4, 4, 512)
(811,)
(186,)


In [11]:
bn_model = Sequential()
bn_model.add(Flatten(input_shape=train_probs.shape[1:]))

bn_model.add(Dense(128, activation='relu'))
bn_model.add(Dropout(0.5))

bn_model.add(Dense(256, activation='relu'))
bn_model.add(Dropout(0.5))

bn_model.add(Dense(512, activation='relu'))
bn_model.add(Dropout(0.5))

bn_model.add(Dense(1, activation='sigmoid'))

bn_model.compile(optimizer='adam',
              loss='binary_crossentropy', metrics=['accuracy'])

bn_model.fit(train_probs, train_gen.classes,
          epochs=50,
          batch_size=16,
          validation_data=(valid_probs, valid_gen.classes), shuffle=True)

ValueError: Input arrays should have the same number of samples as target arrays. Found 1099 input samples and 811 target samples.