# Fast Feature Extraction With Data Augmentation

Is it a triangle, circle, or square?

https://www.kaggle.com/cactus3/basicshapes/home
28x28

     There are two parts of an imagine classification network:
     it begins witn a series of pooling and convolution layers, then ends with a densely connect classifier
     The convolutional base of a model is the first part
     pg. 143 - 144
     
    

### Initialize convolutional base

In [1]:
from keras.applications.mobilenetv2 import MobileNetV2


conv_base = MobileNetV2(
    weights='imagenet',
    input_shape=(224,224, 3),
    include_top=False # exclude the densely connected classifer, which sits on top of hte convolutional network
)
conv_base.trainable = False
conv_base.summary()

Using TensorFlow backend.
  return f(*args, **kwds)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu

### Add a densely connected classifier on top of the convolutional base

In [2]:
from keras import models, layers

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu', input_dim=(7*7*1280)))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Model) (None, 7, 7, 1280)        2257984   
_________________________________________________________________
flatten_1 (Flatten)          (None, 62720)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               16056576  
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 18,314,817
Trainable params: 16,056,833
Non-trainable params: 2,257,984
_________________________________________________________________


### Load samples into Pandas DataFrame

In [3]:
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# initialize model

# augment training data to produce a model capable of handling data variations
train_datagen = ImageDataGenerator(
    rescale=1./255, # rescale to target values between 0 and 255 (default between 0 and 1)
    rotation_range=40, # train on variations rotated up to 40 degrees
    width_shift_range=0.2, # train using variations off-center on x-axis by factor of 0.2
    height_shift_range=0.2, # train using variations off-center on y-axis by a factor of 0.2
    shear_range=0.2, # train using variations sheared/warped by a factor of 0.2
    zoom_range=0.2, # train using variations zoomed by a factor of 0.2
    horizontal_flip=True, # x-axis flip
    vertical_flip=True # y-axis flip
)

test_datagen = ImageDataGenerator(
    rescale=1./255, # rescale to target values between 0 and 255 (default between 0 and 1)
)

# walk through './data/shapes' and load filenames into a dataframe with labels

root, dirs, files = next(os.walk('../trainers/shapes/data/'))

samples_df = pd.DataFrame([
    {
        'label': file.split('_')[0], # filename format '<label>_<int>.png'
        'filename': file
    } for file in files
])
# samples_df = pd.concat([square_sample_df, circle_sample_df, triangle_sample_df])
# samples_df.reset_index(inplace=True, drop=True)


samples_df


Unnamed: 0,filename,label
0,triangle_23.png,triangle
1,triangle_37.png,triangle
2,circle_6.png,circle
3,square_17.png,square
4,circle_10.png,circle
5,circle_38.png,circle
6,circle_39.png,circle
7,triangle_1.png,triangle
8,circle_11.png,circle
9,square_16.png,square


In [4]:
from keras.callbacks import Callback
import time

train_df, validation_df = train_test_split(samples_df, test_size=0.25)

train_df.reset_index(inplace=True, drop=True)
validation_df.reset_index(inplace=True, drop=True)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='../trainers/shapes/data/',
    batch_size=len(train_df),
    class_mode='binary', # use binary labels for binary_crossentropy loss calculations
    target_size=(224,224),
    y_col='label',
)


validation_generator = test_datagen.flow_from_dataframe(
    dataframe=validation_df,
    directory='../trainers/shapes/data/',
    batch_size=len(validation_df),
    class_mode='binary', # use binary labels for binary_crossentropy loss calculations
    target_size=(224,224),
    y_col='label',
    classes=['square', 'circle', 'triangle']

)

model.compile(
    loss='binary_crossentropy',
    optimizer=optimizers.Adam()
    metrics=['binary_accuracy'] # accuracy
)

class TimeHistory(Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

time_callback = TimeHistory()

# https://stackoverflow.com/questions/43178668/record-the-computation-time-for-each-epoch-in-keras-during-model-fit
history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=50,
    callbacks=[time_callback]
)

model.save('./data/shapes_model.h5')


Found 225 images belonging to 3 classes.
Found 75 images belonging to 3 classes.
Epoch 1/30


ResourceExhaustedError: OOM when allocating tensor with shape[225,225,225,3] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[Node: mobilenetv2_1.00_224/Conv1_pad/Pad = Pad[T=DT_FLOAT, Tpaddings=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_mobilenetv2_1.00_224_input_0_0, mobilenetv2_1.00_224/Conv1_pad/Pad/paddings)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
