## Initial import

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import shutil
from glob import glob 

from sklearn.utils import shuffle 
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

## Specify a GPU
import os
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

Using TensorFlow backend.


In [2]:
base_dir = '/data/liu/dog_breeds/'
train_dir = '/data/liu/dog_breeds/train/'
test_dir = '/data/liu/dog_breeds/test/'
origin_data_dir = '/data/liu/all_images/'
# train_folder = os.path.join(base_dir,'base_dir/train')
# valid_folder = os.path.join(base_dir,'base_dir/valid')
# test_folder = os.path.join(base_dir,'base_dir/test')
train_folder = os.path.join(base_dir,'split_set/train')
valid_folder = os.path.join(base_dir,'split_set/valid')
test_folder = os.path.join(base_dir,'split_set/test')

In [3]:
# test_df = pd.read_csv(os.path.join(base_dir,"test_label_name.csv"))
# df_train = pd.read_csv(os.path.join(base_dir,'df_train.csv'))
# df_val = pd.read_csv(os.path.join(base_dir,'df_val.csv'))
test_df = pd.read_csv(os.path.join(base_dir,"df_test.csv"))
df_train = pd.read_csv(os.path.join(base_dir,'df_train.csv'))
df_val = pd.read_csv(os.path.join(base_dir,'df_val.csv'))

## Only run for the first time: split training data into training and validation

In [5]:
# df = pd.DataFrame({'path': glob(os.path.join(base_dir,'*.jpg'))})
train_df = pd.read_csv(os.path.join(base_dir,"train_label_name.csv"))
# df_data = df.merge(labels, on = "id")
train_df .head()

In [4]:
test_df = pd.read_csv(os.path.join(base_dir,"test_label_name.csv"))
# df_data = df.merge(labels, on = "id")
test_df.head()

Unnamed: 0,id,breed_id,breed
0,n02108000_2464.jpg,n02108000,EntleBucher
1,n02112350_9195.jpg,n02112350,keeshond
2,n02088238_10072.jpg,n02088238,basset
3,n02091244_3631.jpg,n02091244,Ibizan_hound
4,n02095570_3534.jpg,n02095570,Lakeland_terrier


In [6]:
## split tarining data into training set and validation set
train_y = train_df['breed']
test_y = test_df['breed']
df_train, df_val = train_test_split(train_df, test_size=0.10, random_state=101, stratify=train_y)

In [14]:
## save training set and validation set into files
df_train.to_csv(os.path.join(base_dir,'df_train.csv'),index=False)
df_val.to_csv(os.path.join(base_dir,'df_val.csv'),index=False)

In [9]:
# Set the id as the index in train_df
train_df.set_index('id', inplace=True)
train_df.head()

Unnamed: 0_level_0,breed_id,breed
id,Unnamed: 1_level_1,Unnamed: 2_level_1
n02106382_112.jpg,n02106382,Bouvier_des_Flandres
n02096294_4535.jpg,n02096294,Australian_terrier
n02096294_6785.jpg,n02096294,Australian_terrier
n02107312_3673.jpg,n02107312,miniature_pinscher
n02113023_7914.jpg,n02113023,Pembroke


In [10]:
# Set the id as the index in test_df
test_df.set_index('id', inplace=True)
test_df.head()

Unnamed: 0_level_0,breed_id,breed
id,Unnamed: 1_level_1,Unnamed: 2_level_1
n02108000_2464.jpg,n02108000,EntleBucher
n02112350_9195.jpg,n02112350,keeshond
n02088238_10072.jpg,n02088238,basset
n02091244_3631.jpg,n02091244,Ibizan_hound
n02095570_3534.jpg,n02095570,Lakeland_terrier


In [9]:
#Create folder fistly
train_y = pd.read_csv(os.path.join(base_dir,'all_dogs.csv'))['breed']

train_folder = os.path.join(base_dir,'split_set/train')
valid_folder = os.path.join(base_dir,'split_set/valid')
test_folder = os.path.join(base_dir,'split_set/test')
for fold in [train_folder, valid_folder, test_folder]:
    for subf in train_y.unique():
        os.makedirs(os.path.join(fold, subf))

In [24]:
## copy images into new generated training and validation directory. src, dsg -> original path, destination path
# df_train.set_index('id', inplace=True)
# df_val.set_index('id', inplace=True)
# test_df.set_index('id', inplace=True)

for image in df_train.index.values:
    breed = str(df_train.loc[image,'breed']) # get the label for a certain image
    src = os.path.join(origin_data_dir, image)
    dst = os.path.join(train_folder, breed, image)
    shutil.copyfile(src, dst)
    
for image in df_val.index.values:
    breed = str(df_val.loc[image,'breed']) # get the label for a certain image
    src = os.path.join(origin_data_dir, image)
    dst = os.path.join(valid_folder, breed, image)
    shutil.copyfile(src, dst)


In [25]:
## copy test images (breeds as folders' names)

for image in test_df.index.values:
    breed = str(test_df.loc[image,'breed']) # get the label for a certain image
    src = os.path.join(origin_data_dir, image)
    dst = os.path.join(test_folder, breed, image)
    shutil.copyfile(src, dst)

In [6]:
## train model

# import matplotlib.pyplot as plt
# import matplotlib.image as mpimg

## Image Data Preprocessing

In [4]:
IMAGE_SIZE = 331
num_train_samples = len(df_train)
num_val_samples = len(df_val)
num_test_samples = len(test_df)

train_batch_size = 32
val_batch_size = 32
test_batch_size = 32
print("Num of train samples: %d" % num_train_samples)
print("Num of validation samples: %d" % num_val_samples)
print("Num of test samples: %d" % num_test_samples)

train_steps = np.ceil(num_train_samples / train_batch_size)
val_steps = np.ceil(num_val_samples / val_batch_size)
test_steps = np.ceil(num_test_samples / test_batch_size)
print("train_steps: %d" % train_steps)
print("val_steps: %d" % val_steps)
print("test_steps: %d" % test_steps)

Num of train samples: 13171
Num of validation samples: 3293
Num of test samples: 4116
train_steps: 412
val_steps: 103
test_steps: 129


In [5]:
from keras.preprocessing.image import ImageDataGenerator
## Noramlize RGB values
datagen = ImageDataGenerator(
    rescale=1./255,
    # preprocessing_function=lambda x:(x - x.mean()) / x.std() if x.std() > 0 else x,
    horizontal_flip=True,
    vertical_flip=True)

In [6]:
train_gen = datagen.flow_from_directory(train_folder,
                                        target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                        batch_size=train_batch_size,
                                        class_mode='categorical')

val_gen = datagen.flow_from_directory(valid_folder,
                                      target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                      batch_size=val_batch_size,
                                      class_mode='categorical')

test_gen = datagen.flow_from_directory(test_folder,
                                       target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                       batch_size=test_batch_size,
                                       class_mode='categorical',
                                       shuffle=False)

Found 13171 images belonging to 120 classes.
Found 3293 images belonging to 120 classes.
Found 4116 images belonging to 120 classes.


## InceptionResNetV2 Model

In [7]:
## InceptionResNetV2
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.models import Sequential, Model
from keras import layers
from keras import optimizers
from keras.layers import Concatenate, BatchNormalization, Flatten, Dense, Input, Dropout, MaxPooling2D, Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D, Lambda, Multiply, LSTM, Bidirectional, PReLU, MaxPooling1D

IMG_SIZE = (IMAGE_SIZE, IMAGE_SIZE)
IN_SHAPE = (*IMG_SIZE, 3)

dropout_dense=0.5

conv_base = InceptionResNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=IN_SHAPE
)

model = Sequential()
model.add(conv_base)
# model.add(GlobalAveragePooling2D())
# # model.add(layers.Dense(120, activation = "sigmoid"))
# model.add(layers.Dense(1024, activation = "relu"))
# model.add(layers.Dense(120, activation = "sigmoid"))
model.add(BatchNormalization())
model.add(Dropout(0.001))
model.add(GlobalMaxPooling2D())
#model.add(layers.Dense(120, activation = "sigmoid"))
model.add(layers.Dense(512, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.001))
model.add(layers.Dense(120, activation = "sigmoid"))


# conv_base.summary()
conv_base.Trainable=True

from keras import optimizers

# conv_base.trainable = False
model.compile(optimizers.Adam(0.0001), loss = 'categorical_crossentropy', metrics=["accuracy"])

TypeError: ('Invalid keyword argument: %s', 'dropout')

In [12]:
## Train inception_resnet model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
h5_path = "inception_resnet_v2_relu_softmax_10.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
earlystopper = EarlyStopping(monitor='val_loss', patience=2, verbose=1,restore_best_weights=True)
reducel = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.5, min_lr = 0.000000001)
history = model.fit_generator(train_gen, steps_per_epoch=train_steps, 
                              validation_data=val_gen, validation_steps=val_steps,
                              epochs=10,
                              callbacks=[reducel, checkpoint])

Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.69967, saving model to inception_resnet_v2_relu_softmax_10.h5
Epoch 2/10

Epoch 00002: val_acc improved from 0.69967 to 0.70544, saving model to inception_resnet_v2_relu_softmax_10.h5
Epoch 3/10

Epoch 00003: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.

Epoch 00003: val_acc did not improve from 0.70544
Epoch 4/10

Epoch 00004: val_acc improved from 0.70544 to 0.72578, saving model to inception_resnet_v2_relu_softmax_10.h5
Epoch 5/10

Epoch 00005: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.

Epoch 00005: val_acc did not improve from 0.72578
Epoch 6/10

Epoch 00006: val_acc did not improve from 0.72578
Epoch 7/10

Epoch 00007: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.

Epoch 00007: val_acc improved from 0.72578 to 0.73459, saving model to inception_resnet_v2_relu_softmax_10.h5
Epoch 8/10

Epoch 00008: val_acc did not improve from 0.73459
Epoch 9/10

Epoch 00009: 

## LARGEFILE

In [8]:
## LARGEFILE
from keras.applications.nasnet import NASNetLarge
from keras.models import Sequential, Model
from keras import layers
from keras import optimizers
from keras.layers import Concatenate, BatchNormalization, Flatten, Dense, Input, Dropout, MaxPooling2D, Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D, Lambda, Multiply, LSTM, Bidirectional, PReLU, MaxPooling1D

IMG_SIZE=331
IMG_SIZE = (IMAGE_SIZE, IMAGE_SIZE)
IN_SHAPE = (*IMG_SIZE, 3)


conv_base= NASNetLarge(input_shape=IN_SHAPE, 
                       include_top=False, 
                       weights='imagenet')


model = Sequential()
model.add(conv_base)
model.add(BatchNormalization())
model.add(Dropout(0.001))
model.add(GlobalAveragePooling2D())
#model.add(layers.Dense(120, activation = "sigmoid"))
model.add(layers.Dense(512, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.001))
model.add(layers.Dense(120, activation = "sigmoid"))

# conv_base.summary()
conv_base.Trainable=True

from keras import optimizers

# conv_base.trainable = False
model.compile(optimizers.Adam(0.0001), loss = 'categorical_crossentropy', metrics=["accuracy"])

In [9]:
## Train inception_resnet model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
h5_path = "NASNetLarge_relu.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
earlystopper = EarlyStopping(monitor='val_loss', patience=2, verbose=1,restore_best_weights=True)
reducel = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.5, min_lr = 0.000000001)
history = model.fit_generator(train_gen, steps_per_epoch=train_steps, 
                              validation_data=val_gen, validation_steps=val_steps,
                              epochs=10,
                              callbacks=[reducel, checkpoint])

Epoch 1/10


ResourceExhaustedError: OOM when allocating tensor with shape[32,42,42,168]
	 [[Node: NASNet/separable_conv_2_normal_right1_4/separable_conv2d = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](NASNet/separable_conv_2_normal_right1_4/separable_conv2d/depthwise, normal_A_block_4/block_1/separable_conv_block_normal_right1_4/separable_conv_2_normal_right1_4/pointwise_kernel/read)]]
	 [[Node: metrics/acc/Mean/_20299 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_94703_metrics/acc/Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'NASNet/separable_conv_2_normal_right1_4/separable_conv2d', defined at:
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/asyncio/base_events.py", line 1425, in _run_once
    handle._run()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/asyncio/events.py", line 127, in _run
    self._callback(*self._args)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2843, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2869, in _run_cell
    return runner(coro)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3044, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3209, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-5a2d8b2c6ec1>", line 19, in <module>
    model.add(conv_base)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/keras/engine/sequential.py", line 165, in add
    layer(x)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/keras/engine/base_layer.py", line 457, in __call__
    output = self.call(inputs, **kwargs)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/keras/engine/network.py", line 564, in call
    output_tensors, _, _ = self.run_internal_graph(inputs, masks)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/keras/engine/network.py", line 721, in run_internal_graph
    layer.call(computed_tensor, **kwargs))
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/keras/layers/convolutional.py", line 1386, in call
    dilation_rate=self.dilation_rate)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 3811, in separable_conv2d
    data_format=tf_data_format)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/nn_impl.py", line 497, in separable_conv2d
    name=name)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 631, in conv2d
    data_format=data_format, name=name)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/liu/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[32,42,42,168]
	 [[Node: NASNet/separable_conv_2_normal_right1_4/separable_conv2d = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](NASNet/separable_conv_2_normal_right1_4/separable_conv2d/depthwise, normal_A_block_4/block_1/separable_conv_block_normal_right1_4/separable_conv_2_normal_right1_4/pointwise_kernel/read)]]
	 [[Node: metrics/acc/Mean/_20299 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_94703_metrics/acc/Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [13]:
# accuracy metric 1
from keras.metrics import categorical_accuracy
result = model.evaluate_generator(test_gen,steps = test_steps)
print(model.metrics_names)
print(result)

['loss', 'acc']
[1.199461695056615, 0.7429543245290529]


In [64]:
# accuracy metric 2
## return the probability of each class
# probs_120 = model.predict_generator(test_gen,steps = test_steps,verbose=1)
# y_pred = np.argmax(probs_120,axis=1)
# accuracy_score(test_gen.classes, y_pred)

0.738581146744412

In [9]:
import seaborn as sns

from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(test_gen.classes, y_pred)
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(conf_mat, annot=True, fmt='d',
            xticklabels=range(0,120), yticklabels=range(0,120))
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [31]:
#!python -c 'import keras; print(keras.__version__)'