Import dataset images of forest and foliage 

In [1]:
import matplotlib.pyplot as ply
import cv2
from sklearn.datasets import load_files
from skimage.io import imread_collection 
from keras.utils import np_utils
import numpy as np
import pandas as pd 
from glob import glob
import os
import math

Using TensorFlow backend.


In [2]:
from sklearn.model_selection import train_test_split

path = "Datasets/train/"
# Function to load train, test and validation datasets

def load_training(path):
    
    # Load in training labels as a Pandas dataframe
    train_targets_data = pd.read_csv('Datasets/train_labels.csv',index_col = False)
    
    # Convert Dataframe into Numpy array
    train_targets = np.array(train_targets_data['invasive'].ix[:,])
    
    # Training image file paths stored as numpy array 'forest_files'
    train_files = np.array(glob("Datasets/train/*"))
    
    # Glob is unordered, sort in increasing numeric file name
    train_files = sorted(train_files, key=lambda name: int(name[15:-4]))
    
    # Insert filepath as name for training set
    train_targets_data['name'] = train_files
    
    return train_files, train_targets_data, train_targets
    
train_files, train_targets_data, train_targets = load_training(path)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if sys.path[0] == '':


In [3]:
train_targets_data.head()

Unnamed: 0,name,invasive
0,Datasets/train/1.jpg,0
1,Datasets/train/2.jpg,0
2,Datasets/train/3.jpg,1
3,Datasets/train/4.jpg,0
4,Datasets/train/5.jpg,1


In [3]:
from sklearn.model_selection import train_test_split

# Need to check that both training and valid files match the target labels.

train_files, valid_files, train_targets, valid_targets = train_test_split(train_files, train_targets, test_size = 0.2)

In [4]:
path = "Datasets/test"

def load_testing(path):
    
    # Load in testing labels as a Pandas dataframe
    test_targets = pd.read_csv('Datasets/sample_submission.csv')
    
    # Forest testing image file paths stored as numpy array 'testing_images'
    test_files = np.array(glob('Datasets/test/*'))
    
    # Glob is unordered, sort in increasing numeric file name
    test_files = sorted(test_files, key = lambda name: int(name[14:-4]))
    
    return test_files, test_targets

test_files, test_targets = load_testing(path)

In [6]:
test_targets.head()

Unnamed: 0,name,invasive
0,1,0.5
1,2,0.5
2,3,0.5
3,4,0.5
4,5,0.5


### File Statistics

In [5]:
print('There are %s total forest images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training forest images.' % len(train_files))
print('There are %d validation forest images.' % len(valid_files))
print('There are %d test forest images.'% len(test_files))

There are 3826 total forest images.

There are 1836 training forest images.
There are 459 validation forest images.
There are 1531 test forest images.


### Pre-process the Data - Conversion into 4D Tensor

In [6]:
# Function to pre-proces the data into 4D array

from keras.preprocessing import image
from tqdm import tqdm

def path_tensor(img_path):
    
    # Takes RGB image and loads as PIL.Image.Image type
    img = image.load_img(img_path, target_size = (224, 224))
    
    # Convert the PIL.Image.Image type to a 3D tensor with the shape (224, 224, 3)
    x = image.img_to_array(img)
    
    # 3D tensor conversion to 4D tensor, shape (1, 224, 224, 3). Returns 4D tensor
    return np.expand_dims(x, axis = 0)

def paths_tensor(img_paths):
    list_tensors = [path_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_tensors)   
    

### Pre-process the Data - RGB to BGR, Mean Pixel 

In [None]:
# Under consideration

### Run Data Pre-processing

In [7]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Pre-Process the data for Keras
train_tensors = paths_tensor(train_files).astype('float32')/255
valid_tensors = paths_tensor(valid_files).astype('float32')/255
test_tensors = paths_tensor (test_files).astype('float32')/255

100%|██████████| 1836/1836 [00:52<00:00, 34.84it/s]
100%|██████████| 459/459 [00:13<00:00, 35.11it/s]
100%|██████████| 1531/1531 [00:41<00:00, 36.83it/s]


### Check Shape of Inputs

In [8]:
print("The shape of the training tensor is:",train_tensors.shape)
print("The shape of the training targets is:",train_targets.shape)
print("The shape of the validation tensor is:",valid_tensors.shape)
print("The shape of the validation targets is:",valid_targets.shape)
print("The shape of the testing tensor is:",test_tensors.shape)
print("The shape of the testing targets is:",test_targets.shape)

The shape of the training tensor is: (1836, 224, 224, 3)
The shape of the training targets is: (1836,)
The shape of the validation tensor is: (459, 224, 224, 3)
The shape of the validation targets is: (459,)
The shape of the testing tensor is: (1531, 224, 224, 3)
The shape of the testing targets is: (1531, 2)


### Obtain Bottleneck Features - VGG16

In [20]:
# Load in Xception bottleneck features

from keras import applications
from keras.preprocessing import image


VGG16_bottleneck_features = applications.VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

### Obtain Bottleneck Features - Xception

In [21]:
# Load in Xception bottleneck features

from keras import applications
from keras.preprocessing import image


Xception_bottleneck_features = applications.Xception(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

### Model Architecture VGG16

In [22]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers

VGG16_model = Sequential()
VGG16_model.add(Flatten(input_shape = VGG16_bottleneck_features.output_shape[1:]))
VGG16_model.add(Dense(256, activation = 'relu'))
VGG16_model.add(Dropout(0.2))
VGG16_model.add(Dense(1, activation = 'sigmoid'))

model_VGG16 = Model(inputs = VGG16_bottleneck_features.input, outputs = VGG16_model(VGG16_bottleneck_features.output))


### Model Architecture Xception

In [23]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers

Xception_model = Sequential()
Xception_model.add(Flatten(input_shape = Xception_bottleneck_features.output_shape[1:]))
Xception_model.add(Dropout(0.2))
Xception_model.add(Dense(1, activation = 'sigmoid'))

model_Xception = Model(inputs = Xception_bottleneck_features.input, outputs = Xception_model(Xception_bottleneck_features.output))



### Compile the VGG16 Model

In [24]:
model_VGG16.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics =['accuracy'])

### Compile the Xception Model

In [25]:
model_Xception.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics =['accuracy'])

### Save the Best Performing Model VGG16

In [26]:
from keras.callbacks import ModelCheckpoint

VGG16_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.VGG16.hdf5', verbose = 1, save_best_only=True)

### Save the Best Performing Model Xception

In [27]:
from keras.callbacks import ModelCheckpoint

Xception_checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Xception.hdf5', verbose = 1, save_best_only=True)

### Image Augementation

In [28]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

train_datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.2,
            height_shift_range = 0.2,
            shear_range = 0.2,
            zoom_range = 0.2,
            horizontal_flip = True,
            fill_mode = 'nearest'
)

train_datagen.fit(train_tensors)

### Fit the VGG16 Model

In [None]:
epochs = 10
batch_size = 32

VGG16_history = model_VGG16.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[VGG16_checkpointer])

Epoch 1/20
 2/28 [=>............................] - ETA: 4328s - loss: 5.8748 - acc: 0.4219

### Fit the Xception Model

In [29]:
epochs = 10
batch_size = 32

Xception_history = model_Xception.fit_generator(
        train_datagen.flow(train_tensors, train_targets, batch_size = batch_size),
        epochs = epochs,
        steps_per_epoch = train_tensors.shape[0] // batch_size,
        validation_data = (valid_tensors, valid_targets),
        callbacks=[Xception_checkpointer])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model_Xception.load_weights('saved_models/weights.best.Xception.hdf5')

In [None]:
predictions = model_Xception.predict(test_tensors)

In [None]:
sample_submission = pd.read_csv("Datasets/sample_submission.csv")

for i, name in enumerate(test_targets):
    sample_submission.loc[sample_submission['name'] == name, 'invasive'] = predictions[i]

sample_submission.to_csv("submit.csv", index=False)