In [1]:
'''
In our setup, we:
- created a data/ folder
- created train/ and validation/ subfolders inside data/
- created forests/ and notforests/ subfolders inside train/ and validation/
- put the forest pictures index 0-999 in data/train/forests
- put the forest pictures index 1000-1400 in data/validation/forests
- put the notforest pictures index 12500-13499 in data/train/notforests
- put the notforests pictures index 13500-13900 in data/validation/notforests
So that we have 1000 training examples for each class, and 400 validation examples for each class.
In summary, this is our directory structure:

data/
    train/
        forests/
            forest001.jpg
            forest002.jpg
            ...
        notforests/
            notforest001.jpg
            notforest002.jpg
            ...
    validation/
        forests/
            forest001.jpg
            forest002.jpg
            ...
        notforests/
            notforest001.jpg
            notforest002.jpg
            ...
'''
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import datetime
import time

st = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d%H%M%S')

Using TensorFlow backend.


In [2]:
# dimensions of our images.
img_width, img_height = 100, 100

train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 6000
nb_validation_samples = 1200
epochs = 50
batch_size = 16

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)
    
#K.set_image_dim_ordering('th')
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])


In [3]:
# this is the augmentation configuration we will use for training
#train_datagen = ImageDataGenerator(
#    rescale=1. / 255,
#    shear_range=0.2,
#    zoom_range=0.2,
#    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
#test_datagen = ImageDataGenerator(rescale=1. / 255)

train_datagen= ImageDataGenerator(rescale=1./255)
test_datagen= ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

model.save_weights('simple_cnn_trial_1_'+str(st)+'.h5')  # always save your weights after training or during training
print(validation_generator.class_indices)

Found 6115 images belonging to 2 classes.
Found 1251 images belonging to 2 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
{'forests': 0, 'notforests': 1}


In [23]:
#VGG16 retraining
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications

# dimensions of our images.
img_width, img_height = 100, 100

top_model_weights_path = 'vgg16_retrained_weights.h5'
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 6000
nb_validation_samples = 1200
epochs = 50
batch_size = 16

def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(train_data_dir, 
                                            target_size=(img_width, img_height),
                                            batch_size=batch_size, 
                                            class_mode=None, 
                                            shuffle=False)
    
    bottleneck_features_train = model.predict_generator(generator, 
                                                        nb_train_samples // batch_size)
    
    np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train)

    generator = datagen.flow_from_directory(validation_data_dir, 
                                            target_size=(img_width, img_height),
                                            batch_size=batch_size, 
                                            class_mode=None, 
                                            shuffle=False)
    
    bottleneck_features_validation = model.predict_generator(generator, 
                                                             nb_validation_samples // batch_size)
    
    np.save(open('bottleneck_features_validation.npy', 'wb'),bottleneck_features_validation)
    
    
    def train_top_model():
        train_data = np.load(open('bottleneck_features_train.npy','rb'))
        train_labels = np.array([0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

        validation_data = np.load(open('bottleneck_features_validation.npy','rb'))
        validation_labels = np.array([0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

        model = Sequential()
        model.add(Flatten(input_shape=train_data.shape[1:]))
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(1, activation='sigmoid'))

        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy', 
                      metrics=['accuracy'])

        model.fit(train_data, train_labels,
                  epochs=epochs, 
                  batch_size=batch_size,
                  validation_data=(validation_data, validation_labels))
        model.save_weights(top_model_weights_path)

save_bottlebeck_features()
train_top_model()

Found 6115 images belonging to 2 classes.
Found 1251 images belonging to 2 classes.
Train on 6000 samples, validate on 1200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [4]:
##<JUNK>###
%matplotlib inline
import os
from matplotlib.pyplot import figure, axis
from os import listdir
from os.path import isfile, join
from skimage.external.tifffile import imread, imshow


def showImagesHorizontally(list_of_files):
    fig = figure()
    number_of_files = len(list_of_files[0:5])
    for i in range(number_of_files):
        #a=fig.add_subplot(100,number_of_files,i+1)
        im = imread(list_of_files[i])
        imshow(im)
        axis('off')

home = os.getcwd()

train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
forestImages = [train_data_dir+'/forests/'+f for f in listdir(train_data_dir+'/forests') if isfile(join(train_data_dir+'/forests', f))]
notForestImages = [train_data_dir+'/notforests/'+f for f in listdir(train_data_dir+'/notforests') if isfile(join(train_data_dir+'/notforests', f))]
#showImagesHorizontally(forestImages)
#showImagesHorizontally(notForestImages)

# dimensions of our images.
img_width, img_height = 100, 100
nb_train_samples = 150
nb_validation_samples = 50
epochs = 10
batch_size = 16
###</JUNK>####