# Training an Image Classification model for NACTI

Train a model on the species dataset

### Set up the environment

In [1]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.utils.training_utils import multi_gpu_model
from keras.callbacks import ModelCheckpoint, TensorBoard

from collections import Counter

import pandas as pd

import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

Using TensorFlow backend.


In [2]:
# path to the model weights files.
# weights_path = '../keras/examples/vgg16_weights.h5'
# top_model_weights_path = 'fc_model.h5'
# dimensions of our images.
img_width, img_height = 224, 224

train_dataframe_path = '/data/dataframes/speciesTrain.csv'
validation_data_dir = '/data/dataframes/speciesTest.csv'
checkpoint_dir = '/data/ResNet50/ResNet50_20190404_species_weights.h5'
tensorboard_dir = '/data/ResNet50/species/logs'
nb_train_samples = 50000
nb_validation_samples = 10000
epochs = 25
batch_size = 64
# gpu_count = 2

### Import the dataframes

In [3]:
# Import the training and validation dataframes
train_df = pd.read_csv(train_dataframe_path)
train_df['abs_file_path'] = '/data/nacti/' + train_df['file_path']
val_df = pd.read_csv(validation_data_dir)
val_df['abs_file_path'] = '/data/nacti/' + val_df['file_path']

# Check to ensure that the camera trap locations are disjoint
assert len(train_df[train_df['cam_location'].isin(val_df['cam_location'].unique())]) == 0, "Train and validation are not disjoint"

# Get the number of classes
min(train_df['category_name'].nunique(), val_df['category_name'].nunique())

16

In [4]:
train_df['category_name'].unique()

array(['american_black_bear', 'bobcat', 'cougar', 'coyote',
       'domestic_cow', 'domestic_dog', 'elk', 'gray_fox', 'moose',
       'mule_deer', 'red_deer', 'red_fox', 'vehicle', 'white_tailed_deer',
       'wild_turkey', 'wolf'], dtype=object)

In [11]:
{cat:i for i, cat in enumerate(train_df['category_name'].unique())}

{'american_black_bear': 0,
 'bobcat': 1,
 'cougar': 2,
 'coyote': 3,
 'domestic_cow': 4,
 'domestic_dog': 5,
 'elk': 6,
 'gray_fox': 7,
 'moose': 8,
 'mule_deer': 9,
 'red_deer': 10,
 'red_fox': 11,
 'vehicle': 12,
 'white_tailed_deer': 13,
 'wild_turkey': 14,
 'wolf': 15}

### Build the model

In [4]:
# build the MobileNetV2 network
ResNet50 = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))
print('Model loaded.')

# build a classifier model to put on top of the convolutional model
model = Sequential()
model.add(ResNet50)
model.add(Flatten(input_shape=model.output_shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='softmax'))

# Make it a multi-gpu model if available
# model = multi_gpu_model(model, gpus=gpu_count)

# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath=checkpoint_dir, verbose=1, save_best_only=True)
tboard = TensorBoard(tensorboard_dir)

Instructions for updating:
Colocations handled automatically by placer.




Model loaded.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


### Create Data Generators

In [11]:
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

validation_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    x_col='abs_file_path', 
    y_col ='category_name', 
    target_size=(img_width, img_height), 
    batch_size=batch_size, 
    shuffle=True,
    class_mode='categorical')

validation_generator = validation_datagen.flow_from_dataframe(
    val_df, 
    x_col='abs_file_path', 
    y_col ='category_name', 
    target_size=(img_width, img_height), 
    batch_size=batch_size, 
    shuffle=True,
    class_mode='categorical')

counter = Counter(train_generator.classes)                          
max_val = float(max(counter.values()))       
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}  
print(class_weights)

Found 115923 images belonging to 16 classes.
Found 31833 images belonging to 16 classes.
{0: 1.0, 1: 1.0, 2: 1.0283833813245578, 3: 1.0, 4: 1.0, 5: 21.008403361344538, 6: 1.0, 7: 1.5780337699226763, 8: 1.639881928501148, 9: 1.0, 10: 1.0, 11: 8.19000819000819, 12: 1.0, 13: 1.1978917105893627, 14: 2.986857825567503, 15: 26.954177897574123}


Define the weights

In [12]:
if max([v for k,v in class_weights.items()]) < 2:
    class_weights = {0:1, 1:1}

### Train the model

In [13]:
# fine-tune the model
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples//batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples//batch_size, 
    class_weight=class_weights, 
    max_queue_size=batch_size*4,
    callbacks=[checkpointer, tboard])

Instructions for updating:
Use tf.cast instead.
Epoch 1/25

Epoch 00001: val_loss improved from inf to 1.08408, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoch 2/25

Epoch 00002: val_loss improved from 1.08408 to 0.88733, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoch 3/25

Epoch 00003: val_loss improved from 0.88733 to 0.84743, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoch 4/25

Epoch 00004: val_loss improved from 0.84743 to 0.78951, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoch 5/25

Epoch 00005: val_loss improved from 0.78951 to 0.78319, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoch 6/25

Epoch 00006: val_loss improved from 0.78319 to 0.70561, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoch 7/25

Epoch 00007: val_loss improved from 0.70561 to 0.65613, saving model to /data/ResNet50/ResNet50_20190404_species_weights.h5
Epoc