Import dataset images of forest and foliage 

In [None]:
import matplotlib.pyplot as ply
import cv2
from sklearn.datasets import load_files
from skimage.io import imread_collection 
from keras.utils import np_utils
import numpy as np
import pandas as pd 
from glob import glob
import os
import math

In [None]:
from sklearn.model_selection import train_test_split

path = "Datasets/train/"
# Function to load train, test and validation datasets

def load_training(path):
    
    # Load in training labels as a Pandas dataframe
    train_targets_data = pd.read_csv('Datasets/train_labels.csv',index_col = False)
    
    # Convert Dataframe into Numpy array
    train_targets = np.array(train_targets_data['invasive'].ix[:,])
    
    # Training image file paths stored as numpy array 'forest_files'
    train_files = np.array(glob("Datasets/train/*"))
    
    # Glob is unordered, sort in increasing numeric file name
    train_files = sorted(train_files, key=lambda name: int(name[15:-4]))
    
    # Insert filepath as name for training set
    train_targets_data['name'] = train_files
    
    return train_files, train_targets_data, train_targets
    
train_files, train_targets_data, train_targets = load_training(path)

In [None]:
train_targets_data.head()

In [None]:
from sklearn.model_selection import train_test_split

# Need to check that both training and valid files match the target labels.

train_files, valid_files, train_targets, valid_targets = train_test_split(train_files, train_targets, test_size = 0.2, random_state = 42)

In [None]:
path = "Datasets/test"

def load_testing(path):
    
    # Load in testing labels as a Pandas dataframe
    test_targets = pd.read_csv('Datasets/sample_submission.csv')
    
    # Forest testing image file paths stored as numpy array 'testing_images'
    test_files = np.array(glob('Datasets/test/*'))
    
    # Glob is unordered, sort in increasing numeric file name
    test_files = sorted(test_files, key = lambda name: int(name[14:-4]))
    
    return test_files, test_targets

test_files, test_targets = load_testing(path)

In [None]:
test_targets.head()

### File Statistics

In [None]:
print('There are %s total forest images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training forest images.' % len(train_files))
print('There are %d validation forest images.' % len(valid_files))
print('There are %d test forest images.'% len(test_files))

### Pre-process the Data - Conversion into 4D Tensor

In [None]:
# Function to pre-proces the data into 4D array

from keras.preprocessing import image
from tqdm import tqdm

def path_tensor(img_path):
    
    # Takes RGB image and loads as PIL.Image.Image type
    img = image.load_img(img_path, target_size = (224, 224))
    
    # Convert the PIL.Image.Image type to a 3D tensor with the shape (224, 224, 3)
    x = image.img_to_array(img)
    
    # 3D tensor conversion to 4D tensor, shape (1, 224, 224, 3). Returns 4D tensor
    return np.expand_dims(x, axis = 0)

def paths_tensor(img_paths):
    list_tensors = [path_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_tensors)   
    

### Pre-process the Data - RGB to BGR, Mean Pixel 

In [None]:
# Under consideration

### Run Data Pre-processing

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Pre-Process the data for Keras
train_tensors = paths_tensor(train_files).astype('float32')/255
valid_tensors = paths_tensor(valid_files).astype('float32')/255
test_tensors = paths_tensor (test_files).astype('float32')/255

### Check Shape of Inputs

In [None]:
print("The shape of the training tensor is:",train_tensors.shape)
print("The shape of the training targets is:",train_targets.shape)
print("The shape of the validation tensor is:",valid_tensors.shape)
print("The shape of the validation targets is:",valid_targets.shape)
print("The shape of the testing tensor is:",test_tensors.shape)
print("The shape of the testing targets is:",test_targets.shape)

## Obtain Bottleneck Features

In [38]:
img_width, img_height, img_channels = 224, 224, 3

### Obtain Bottleneck Features - VGG16

In [None]:
# Load in VGG16 bottleneck features

VGG16_bottleneck_features = applications.VGG16(include_top=False, weights='imagenet', input_shape=(img_width, img_height, img_channels))

### Obtain Bottleneck Features - VGG19

In [None]:
# Load in VGG19 bottleneck features

VGG19_bottleneck_features = applications.VGG19(include_top=False, weights='imagenet', input_shape=(img_width, img_height, img_channels))

### Obtain Bottleneck Features - Xception

In [None]:
# Load in Xception bottleneck features

Xception_bottleneck_features = applications.Xception(include_top=False, weights='imagenet', input_shape=(img_width, img_height, img_channels))

### Obtain Bottleneck Features - Inception V3

In [None]:
# Load in Inception V3 bottleneck features

Inception_bottleneck_features = applications.InceptionV3(include_top=False, weights='imagenet', input_shape=(img_width, img_height, img_channels))

### Obtain Bottleneck Features - ResNet50

In [37]:
# Load in ResNet50 bottleneck features

ResNet50_bottleneck_features = applications.ResNet50(include_top=False, weights='imagenet', input_shape=(img_width, img_height, img_channels))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


## Model Architecture

In [None]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers

### Model Architecture VGG16

In [None]:

VGG16_model = Sequential()
VGG16_model.add(Flatten(input_shape = VGG16_bottleneck_features.output_shape[1:]))
VGG16_model.add(Dense(256, activation = 'relu'))
VGG16_model.add(Dropout(0.5))
VGG16_model.add(Dense(1, activation = 'sigmoid'))

model_VGG16 = Model(inputs = VGG16_bottleneck_features.input, outputs = VGG16_model(VGG16_bottleneck_features.output))

### Model Architecture VGG19

In [None]:

VGG19_model = Sequential()
VGG19_model.add(Flatten(input_shape = VGG19_bottleneck_features.output_shape[1:]))
VGG19_model.add(Dense(256, activation = 'relu'))
VGG19_model.add(Dropout(0.5))
VGG19_model.add(Dense(1, activation = 'sigmoid'))

model_VGG19 = Model(inputs = VGG19_bottleneck_features.input, outputs = VGG19_model(VGG19_bottleneck_features.output))

### Model Architecture Xception

In [None]:

Xception_model = Sequential()
Xception_model.add(Flatten(input_shape = Xception_bottleneck_features.output_shape[1:]))
Xception_model.add(Dense(256, activation = 'relu'))
Xception_model.add(Dropout(0.5))
Xception_model.add(Dense(1, activation = 'sigmoid'))

model_Xception = Model(inputs = Xception_bottleneck_features.input, outputs = Xception_model(Xception_bottleneck_features.output))

### Model Architecture Inception V3

In [None]:

Inception_model = Sequential()
Inception_model.add(Flatten(input_shape = Inception_bottleneck_features.output_shape[1:]))
Inception_model.add(Dense(256, activation = 'relu'))
Inception_model.add(Dropout(0.5))
Inception_model.add(Dense(1, activation = 'sigmoid'))

model_Inception = Model(inputs = Inception_bottleneck_features.input, outputs = Inception_model(Inception_bottleneck_features.output))

### Model Architecture ResNet50

In [None]:

ResNet50_model = Sequential()
ResNet50_model.add(Flatten(input_shape = ResNet50_bottleneck_features.output_shape[1:]))
ResNet50_model.add(Dense(256, activation = 'relu'))
ResNet50_model.add(Dropout(0.5))
ResNet50_model.add(Dense(1, activation = 'sigmoid'))

model_ResNet50 = Model(inputs = ResNet50_bottleneck_features.input, outputs = ResNet50_model(ResNet50_bottleneck_features.output))

## Compile the Model(s)

In [39]:
lr = 1e-4
momentum = 0.9 

### Compile the VGG16 Model

In [None]:
model_VGG16.compile(optimizers.SGD(lr = lr, momentum = momentum), loss = 'binary_crossentropy', metrics =['accuracy'])

### Compile the VGG19 Model

In [None]:
model_VGG19.compile(optimizers.SGD(lr = lr, momentum = momentum), loss = 'binary_crossentropy', metrics =['accuracy'])

### Compile the Xception Model

In [None]:
model_Xception.compile(optimizers.SGD(lr = lr ,momentum = momentum), loss = 'binary_crossentropy', metrics =['accuracy'])

### Compile the Inception V3 Model

In [None]:
model_Inception.compile(optimizers.SGD(lr = lr ,momentum = momentum), loss = 'binary_crossentropy', metrics =['accuracy'])

### Compile the ResNet 50 model

In [None]:
model_ResNet50.compile(optimizers.SGD(lr = lr ,momentum = momentum), loss = 'binary_crossentropy', metrics =['accuracy'])

### Model Summary

## Save the Best Performing Model(s)

In [None]:
from keras.callbacks import ModelCheckpoint

### Save the Best Performing Model VGG16

In [None]:
VGG16_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.VGG16.hdf5', verbose = 1, save_best_only=True)

### Save the Best Performing Model VGG19

In [None]:
VGG19_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.VGG19.hdf5', verbose = 1, save_best_only=True)

### Save the Best Performing Model Xception

In [None]:
Xception_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Xception.hdf5', verbose = 1, save_best_only=True)

### Save the Best Performing Model Inception V3

In [None]:
Inception_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Inception.hdf5', verbose = 1, save_best_only=True)

### Save the Best Performing Model ResNet50

In [None]:
ResNet50_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.ResNet50.hdf5', verbose = 1, save_best_only=True)

### Image Augementation

In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

train_datagen = ImageDataGenerator(
            rotation_range = 40,
            width_shift_range = 0.2,
            height_shift_range = 0.2,
            shear_range = 0.2,
            zoom_range = 0.2,
            horizontal_flip = True,
            vertical_flip = True,
            fill_mode = 'nearest'
)

train_datagen.fit(train_tensors)

## Fit the Model(s)

In [44]:
epochs = 50
batch_size = 64

### Fit the VGG16 Model

In [None]:
VGG16_history = model_VGG16.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[VGG16_checkpointer])

### Fit the VGG19 Model

In [None]:
VGG19_history = model_VGG19.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[VGG16_checkpointer])

### Fit the Xception Model

In [None]:
Xception_history = model_Xception.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[Xception_checkpointer])

### Fit the Inception V3 Model

In [None]:
Inception_history = model_Inception.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[Xception_checkpointer])

### Fit the ResNet50 Model

In [None]:
ResNet50_history = model_ResNet50.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[Xception_checkpointer])

## Load the Best Saved Weights

In [None]:
model_VGG16.load_weights('saved_models/weights.best.VGG16.hdf5')
model_VGG19.load_weights('saved_models/weights.best.VGG19.hdf5')
model_Xception.load_weights('saved_models/weights.best.Xception.hdf5')
model_Inception.load_weights('saved_models/weights.best.Inception.hdf5')
model_ResNet50.load_weights('saved_models/weights.best.ResNet50.hdf5')

In [None]:
predictions = model_VGG16.predict(test_tensors)
predictions = model_VGG19.predict(test_tensors)
predictions = model_Xception.predict(test_tensors)
predictions = model_Inception.predict(test_tensors)
predictions = model_ResNet50.predict(test_tensors)

In [None]:
sample_submission = pd.read_csv("Datasets/sample_submission.csv")
img_path = "Datasets/test/"

test_names = []
file_paths = []

for i in range(len(sample_submission)):
    test_names.append(sample_submission.loc[i][0])
    file_paths.append( img_path + str(int(sample_submission.loc[i][0])) +'.jpg' )
    
test_names = np.array(test_names)

In [None]:
sample_submission = pd.read_csv("Datasets/sample_submission.csv")

for i, name in enumerate(test_targets):
    sample_submission.loc[sample_submission['name'] == name, 'invasive'] = predictions[i]

sample_submission.to_csv("submit_XXX.csv", index=False)