The most popular method to prevent overfitting in neural networks is adding dropouts. 

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint

In [None]:
n_classes = 5

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip=False,
                                   validation_split=0.25)

test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory('data',
                                                target_size = (150,150),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical',
                                                 subset = "training")

validation_set = train_datagen.flow_from_directory('data',
                                            target_size = (150,150),
                                            batch_size = batch_size,
                                            class_mode = 'categorical',
                                            subset = "validation")

To monitor training and prevent overfitting, we introduce callbacks:

In [None]:
callbacks =[EarlyStopping(monitor = 'val_acc', patience = 5, verbose = 2),
            ModelCheckpoint('checkpoints/{epoch:02d}.h5', save_best_only=True),
            TensorBoard('~/notebooks/logs-lrscheduler', write_graph=True, write_grads=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None),
            ]

Next, we define our model architecture **without dropout** and compile our model:

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(150, 150,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
# model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation('softmax'))
model.summary()

In [None]:
optimizer = SGD()
model.compile(loss='categorical_crossentropy',  optimizer=optimizer, metrics=['accuracy'])

In [None]:
n_epochs = 50
batch_size = 128

history = model.fit(training_set, epochs = n_epochs, batch_size = batch_size, 
          validation_data = validation_set,
          verbose = 1, callbacks = callbacks)

Now, let's add dropouts to our model architecture. We do this after each convolutional block and after the fully connected layer:

In [None]:
model_dropout = Sequential()
model_dropout.add(Conv2D(32, (3, 3), padding='same', input_shape=X_train.shape[1:]))
model_dropout.add(Activation('relu'))
model_dropout.add(Conv2D(32, (3, 3)))
model_dropout.add(Activation('relu'))
model_dropout.add(MaxPooling2D(pool_size=(2, 2)))
model_dropout.add(Dropout(0.25))

model_dropout.add(Conv2D(64, (3, 3), padding='same'))
model_dropout.add(Activation('relu'))
model_dropout.add(Conv2D(64, (3, 3)))
model_dropout.add(Activation('relu'))
model_dropout.add(MaxPooling2D(pool_size=(2, 2)))
model_dropout.add(Dropout(0.25))

model_dropout.add(Flatten())
model_dropout.add(Dense(512))
model_dropout.add(Activation('relu'))
model_dropout.add(Dropout(0.5))
model_dropout.add(Dense(n_classes))
model_dropout.add(Activation('softmax'))
model.summary()

In [None]:
optimizer = Adam()
model_dropout.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

Train the model again with dropout

In [None]:
n_epochs = 1000
batch_size = 128

history_dropout = model_dropout.fit(training_set, epochs = n_epochs, batch_size = batch_size, 
          validation_data = validation_set,
          verbose = 1, callbacks = callbacks)

In [None]:
aplt.plot(np.arange(len(history.history['acc'])), history.history['acc'], label='training')
plt.plot(np.arange(len(history.history['val_acc'])), history.history['val_acc'], label='validation')
plt.title('Accuracy of model without dropouts')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc=0)
plt.show()

In [None]:
plt.plot(np.arange(len(history_dropout.history['acc'])), history_dropout.history['acc'], label='training')
plt.plot(np.arange(len(history_dropout.history['val_acc'])), history_dropout.history['val_acc'], label='validation')
plt.title('Accuracy of model with dropouts')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc=0)
plt.show()

In [None]:
print('Minimum loss: ', min(history.history['val_loss']), 
 '\nAfter ', np.argmin(history.history['val_loss']), ' epochs')

In [None]:
print('Minimum loss: ', min(history_dropout.history['val_loss']), 
 '\nAfter ', np.argmin(history_dropoutp.history['val_loss']), ' epochs')