# 图像增强 Data Augmentations

In [8]:
import os
import sys
import cv2
import h5py
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time
from datetime import datetime
from tqdm import tqdm
from utils import get_params_count

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from keras.applications import inception_v3, xception, resnet50, vgg16, vgg19
from keras.applications import InceptionV3, Xception, ResNet50, VGG16, VGG19
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.layers import Input, Dense, Dropout, Activation, Flatten, Lambda
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator

In [2]:
def load_input(height, width, input_type):
    train_name = os.listdir('./train')
    test_name = os.listdir('./test')
    train_size = len(train_name)
    test_size = len(test_name)
    X_train = np.zeros((train_size, height, width, 3), dtype=input_type)
    X_test = np.zeros((test_size, height, width, 3), dtype=input_type)
    
    labels = pd.read_csv('labels.csv')
    breeds = list(set(labels['breed']))
    breeds.sort()
    Y_test = pd.read_csv('sample_submission.csv')

    # Labels
    Y_train = np.zeros((train_size, len(breeds)), dtype=np.uint8)
    for i in range(train_size):
        onehot = breeds.index(labels['breed'][i])
        Y_train[i][onehot] = 1

    # Train data
    for i in tqdm(range(train_size)):
        img = cv2.imread('./train/%s.jpg' % labels['id'][i])
        img = cv2.resize(img, dsize=(width, height))
        img = img[:, :, ::-1]
        X_train[i] = img

    # Test data
    for i in tqdm(range(test_size)):
        img = cv2.imread('./test/%s.jpg' % Y_test['id'][i])
        img = cv2.resize(img, dsize=(width, height))
        img = img[:, :, ::-1]
        X_test[i] = img

    print("Train: %d, Test: %d" % (train_size, test_size))
    print("Total Dog Breeds:", len(breeds))
    print('Training Data Size = %.2f GB' % (sys.getsizeof(X_train)/1024**3))
    print('Testing Data Size = %.2f GB' % (sys.getsizeof(X_test)/1024**3))
    
    return X_train, X_test, Y_train, Y_test

In [3]:
height = 299
width = 299
img_train, img_test, lb_train, lb_test = load_input(height, width, np.uint8)

100%|█████████████████████████████████████████████████████| 10222/10222 [00:54<00:00, 187.50it/s]
100%|█████████████████████████████████████████████████████| 10357/10357 [00:55<00:00, 187.35it/s]


Train: 10222, Test: 10357
Total Dog Breeds: 120
Training Data Size = 2.55 GB
Testing Data Size = 2.59 GB


In [4]:
X_train, X_val, y_train, y_val = train_test_split(img_train, lb_train, shuffle=True, test_size=0.2, random_state=42)

In [13]:
X_train.shape, X_val.shape

((8177, 299, 299, 3), (2045, 299, 299, 3))

In [11]:
# Augment Training Data
train_gen = ImageDataGenerator(
    preprocessing_function=inception_v3.preprocess_input,
    rotation_range=10.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    zoom_range=0.1,
)

# Leave Validation Data intact
test_gen = ImageDataGenerator(
    preprocessing_function=inception_v3.preprocess_input,
)

# Fit generator
train_gen.fit(X_train)
test_gen.fit(X_val)

# Patch X and Y together
gen1 = train_gen.flow(X_train, y_train, 64)
gen2 = test_gen.flow(X_val, y_val, 64)

In [19]:
base_model = InceptionV3(include_top=False, input_shape=(299, 299, 3), weights='imagenet', pooling='avg')
for layer in base_model.layers:
    layer.trainable = False
y = Dropout(0.2)(base_model.output)
y = Dense(120, activation='softmax', kernel_initializer='he_normal')(y)
model = Model(inputs=base_model.input, outputs=y)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
log_dir = '.\\models\\' + datetime.now().strftime('transfer_model_%Y%m%d_%H%M')
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

es = EarlyStopping(monitor='val_loss', patience=20)
mc = ModelCheckpoint(log_dir + '\\DogBreed-EP{epoch:02d}-LOSS{val_loss:.4f}.h5', 
                     monitor='val_loss', save_best_only=True)

batch_size = 64

model.fit_generator(generator=gen1, steps_per_epoch=8177//batch_size, nb_epoch=1, 
                    validation_data=gen2, validation_steps=2045//batch_size, callbacks=[es, mc])

  if sys.path[0] == '':
  if sys.path[0] == '':


Epoch 1/1


<keras.callbacks.History at 0x2a29c097f98>

In [20]:
log_dir = '.\\models\\' + datetime.now().strftime('transfer_model_%Y%m%d_%H%M')
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

es = EarlyStopping(monitor='val_loss', patience=20)
mc = ModelCheckpoint(log_dir + '\\DogBreed-EP{epoch:02d}-LOSS{val_loss:.4f}.h5', 
                     monitor='val_loss', save_best_only=True)

batch_size = 64
model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=1, validation_data=(X_val, y_val), callbacks=[es, mc])

Train on 8177 samples, validate on 2045 samples
Epoch 1/1


<keras.callbacks.History at 0x2a56350d978>