In [16]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

### HOW TO
* Download data by fastai

* Copy data to correct folder 
     
     data/pets/_pet_type_>/_image_
* Load ResNet50 with weights
* ① Replace top layer 
* ② Extract feature from ResNet50, then classify in other network

In [17]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os, json, re
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import shutil 

In [18]:
from fastai.vision import *
import tensorflow.keras as K

In [19]:
current_dir = os.getcwd()
data_folder = "data/pets"
path = untar_data(URLs.PETS); path

PosixPath('/home/jupyter/.fastai/data/oxford-iiit-pet')

In [21]:
def preprocess_data(download_path):
    data_path = os.path.join(current_dir, data_folder)
    try:
        # Create target Directory
        os.mkdir(data_path)
        print("Directory ", data_path, " created ") 
    except FileExistsError:
        print("Directory ", data_path, " already exists")

    label_count = 0
    labels = []
    last_label = ''
    
    # image file name format
    pat = r'/([^/]+)_\d+.jpg$'
    pat = re.compile(pat)
    
    g = glob(download_path + '/*.jpg')
    for fn in g:
        res = pat.search(fn)
        if res.group(1) not in labels:
            try:
                os.mkdir(os.path.join(data_folder, res.group(1)))
            except FileExistsError:
                print("folder exist")
                
            label_count += 1
            labels.append(res.group(1))
        
        shutil.copy2(fn, os.path.join(data_folder, res.group(1)))
    
    return label_count, labels

def remove_data(data_path):
    remove_path = os.path.join(current_dir, data_folder)
    try:
        shutil.rmtree(remove_path)
    except:
        print('Error while deleting directory')

        
label_count, labels = preprocess_data(str(path/'images'))
print(label_count, labels)
# label_count=37
# labels=['wheaten_terrier', 'newfoundland', 'Russian_Blue', 'havanese', 'Siamese',
#         'chihuahua', 'pomeranian', 'Birman', 'staffordshire_bull_terrier', 'scottish_terrier',
#         'Bengal', 'american_pit_bull_terrier', 'Persian', 'shiba_inu', 'British_Shorthair', 
#         'yorkshire_terrier', 'english_setter', 'Sphynx', 'Bombay', 'german_shorthaired', 'samoyed', 
#         'saint_bernard', 'keeshond', 'american_bulldog', 'beagle', 'boxer', 'leonberger', 'miniature_pinscher', 
#         'Ragdoll', 'pug', 'english_cocker_spaniel', 'basset_hound', 'Abyssinian', 'Egyptian_Mau', 
#         'Maine_Coon', 'great_pyrenees', 'japanese_chin']

In [20]:
# !pip install keras --upgrade
# !pip install Keras-Applications
# import keras
print(K.__version__)

2.2.4-tf


In [22]:
import tensorflow as tf
from keras.applications import ResNet50
from keras.models import Sequential
from keras.layers import Dense
from keras.preprocessing.image import ImageDataGenerator

## Create ResNet50 model with customized top layer

In [10]:
conv_base=ResNet50(include_top=False, input_shape=(224, 224, 3), pooling='avg')

Instructions for updating:
Colocations handled automatically by placer.


In [11]:
conv_base.trainable=False
model = Sequential()
model.add(conv_base)
model.add(Dense(37, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_1 (Dense)              (None, 37)                75813     
Total params: 23,663,525
Trainable params: 75,813
Non-trainable params: 23,587,712
_________________________________________________________________


In [12]:
datagen = ImageDataGenerator(rescale=1./255)
bs=256
generator=datagen.flow_from_directory(
    directory=os.path.join(current_dir, data_folder),
    target_size=(224,224),
    batch_size=bs,
    classes=labels,
    class_mode='categorical')

Found 7394 images belonging to 37 classes.


In [13]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

In [15]:
%%time
steps_per_epoch=np.ceil(7393/bs)
print(steps_per_epoch)
history = model.fit_generator(generator, steps_per_epoch=steps_per_epoch, epochs=10)

29.0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 21min 4s, sys: 58.8 s, total: 22min 3s
Wall time: 7min 1s


## Use ResNet50 to extracfeature

In [23]:
conv_base2=ResNet50(include_top=False, input_shape=(224, 224, 3), pooling='avg')

In [24]:
%%time
def extract_feature(sample_count):
    batch_size=64
    features=np.zeros(shape=(sample_count, 2048))
    labels=np.zeros(shape=(sample_count))
    datagen2 = ImageDataGenerator(rescale=1./255)
    generator=datagen2.flow_from_directory(
        directory=os.path.join(current_dir, data_folder),
        target_size=(224,224),
        batch_size=batch_size,
        class_mode='binary')
    
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base2.predict(inputs_batch)
        features[i* batch_size : (i+1) * batch_size] = features_batch
        labels[i * batch_size: (i+1) * batch_size] = labels_batch
        i += 1
        if i*batch_size >= sample_count:
            break
    return features, labels

train_features, train_labels = extract_feature(7394)

Found 7394 images belonging to 37 classes.
CPU times: user 3min 10s, sys: 14.8 s, total: 3min 25s
Wall time: 1min 21s


In [25]:
print(train_features.shape)
print(train_labels.shape)
print(train_labels[100:130])

(7394, 2048)
(7394,)
[15. 11. 29. 31. 21.  7.  2.  8.  4. 34. 27. 15. 28. 10. 36.  6. 21.  4. 29. 28.  9. 18. 22. 28.
  3. 35. 20.  9.  0.  4.]


In [27]:
%%time
model2 = Sequential()
model2.add(Dense(256, activation="relu", input_dim=2048))
model2.add(Dense(37, activation="softmax"))

model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
history2 = model2.fit(train_features, train_labels, epochs=1000, batch_size=64, verbose=0)

CPU times: user 11min 28s, sys: 2min 11s, total: 13min 40s
Wall time: 7min 16s
