In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


In [1]:
import numpy as np
from skimage.io import imread, imsave, imshow
import matplotlib.pyplot as plt
import pandas as pd
import os
from skimage.color import rgb2gray
import math
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import BatchNormalization
from keras.optimizers import Adam
%matplotlib inline

Using TensorFlow backend.


In [0]:
root_dir  = '/content/drive/My Drive/BirdsCV/'
train_dir = os.path.join(root_dir, 'data', 'train')
data_dir  = os.path.join(train_dir, 'images')
y_path    = os.path.join(train_dir, 'gt.csv')
model_dir = os.path.join(root_dir, 'models')
image_size = 224

In [0]:
df = pd.read_csv(y_path)
df['class_id'] = df['class_id'].apply(lambda x: str(x).zfill(2))
valdf = df.sample(frac=0.2, random_state=42)
traindf = pd.concat([df, valdf]).drop_duplicates(keep=False)

In [5]:
from keras.preprocessing.image import ImageDataGenerator

common_datagen_params = dict(
    samplewise_center=True,
    samplewise_std_normalization=True,
    rescale=1./255
)

augmentation_params = dict(
    rotation_range=20,
    width_shift_range=0,
    height_shift_range=0,
    zoom_range=[1, 1.3],
    horizontal_flip=True,
    fill_mode='reflect'
)

generator_params = dict(
    directory=data_dir,
    x_col="filename",
    y_col="class_id",
    target_size=(image_size, image_size),
    batch_size=62,
    class_mode='categorical'
)

train_datagen = ImageDataGenerator(
    **common_datagen_params,
    **augmentation_params
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=traindf,
    **generator_params
)

full_train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    **generator_params
)

val_generator = ImageDataGenerator(
    **common_datagen_params
).flow_from_dataframe(
    dataframe=valdf,
    **generator_params
)

Found 2000 validated image filenames belonging to 50 classes.
Found 2500 validated image filenames belonging to 50 classes.
Found 500 validated image filenames belonging to 50 classes.


In [6]:
from keras.applications import ResNet50

main_conv = ResNet50(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))

for layer in main_conv.layers[:-4]:
    layer.trainable = False

















Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [7]:
from keras import models
from keras import layers
from keras import optimizers

model = models.Sequential()

model.add(main_conv)

model.add(layers.Flatten())
model.add(layers.Dropout(0.1))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(50, activation='softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.Adam(lr=2e-4),
    metrics=['acc']
)

model.summary()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 7, 7, 2048)        23587712  
_________________________________________________________________
flatten_1 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 100352)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 300)               30105900  
_________________________________________________________________
dropout_2 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               30100     
__

In [8]:
t_gen = train_generator
history = model.fit_generator(
        t_gen,
        steps_per_epoch=t_gen.samples/t_gen.batch_size,
        epochs=20,
        validation_data=val_generator,
        validation_steps=val_generator.samples/val_generator.batch_size
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
model_path = os.path.join(model_dir, 'model5.hdf5')

In [0]:
model.save(model_path)

In [0]:
from tensorflow.keras.models import load_model
model = load_model(model_path)

In [0]:
def classify(model, test_img_dir):
    filenames = sorted(os.listdir(test_img_dir))
    
    test_generator = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization=True,
        rescale=1./255
    ).flow_from_dataframe(
        dataframe=pd.DataFrame(
            {'filename': filenames}
        ),
        directory=test_img_dir,
        class_mode='input',
        x_col="filename",
        shuffle=False,
        target_size=(image_size, image_size),
        batch_size=125
    )
        
    prediction = model.predict_generator(test_generator)
    answers = prediction.argmax(axis=1)
    
    return {
        fname: answer
        for fname, answer in zip(filenames, answers)
    }


In [87]:
prediction = classify(model, data_dir)

pred_df = pd.DataFrame({
    'filename': sorted(prediction.keys()),
    'class_id': list(map(lambda x: prediction[x], prediction))
})

(pred_df.class_id == df.class_id.apply(int)).mean()

Found 2500 validated image filenames.


0.9436