In [26]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

### DOWNLOAD DATA ( following FastAI course ) 
- run script in google image search
- save in .csv
- run script in notebook to download image
- put data in folder data/bears/ ( subfolder : "black", "grizzly", "teddys" )

In [142]:
path = 'data/bears'
classes = ['teddys','grizzly','black']

In [143]:
from tensorflow.keras.applications import ResNet50
size = (224, 224)
size_3d = (224, 224, 3)
# base_model = ResNet50(weights='imagenet', include_top = False, input_shape=size_3d)
base_model = ResNet50(weights='imagenet', include_top = False, input_shape=size_3d, pooling='avg')   # add pooling layer to reduce output to (None, 2048)

In [144]:
base_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_7[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalizationV1) (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

### USE ResNet50 as feature extractor <= Cannot use Data Augmentation

In [145]:
resNet_output = base_model.layers[-1].output
resNet_output_d = resNet_output.shape[1]
print(resNet_output_d)

2048


In [146]:
import os
data = []
labels = []
idx = 0
for c in classes:
    folder_p = os.path.join(os.getcwd(), path, c)
    files = os.listdir(folder_p)
    for f in files:
        data.append(os.path.join(folder_p, f))
        labels.append(idx)
    idx += 1

In [147]:
def get_feature(image_list, model):
    imgs = np.zeros((len(image_list), 224, 224, 3))
    for i in range(len(image_list)):
        img_path = image_list[i]
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (224,224))
        img = img / 255.
        imgs[i, :, :, :] = img
    
    feature = model.predict(imgs)
    return feature
    
def feature_extraction(image_list, model, bs):
    output_len = model.layers[-1].output.shape[1]
    example_count = len(image_list)
    
    features = np.zeros((example_count, output_len))
    
    idx = 0
    while idx < len(image_list) - bs:
        current_images =  image_list[idx: idx + bs]
        features[idx: idx + bs, :] = get_feature(current_images, model)
        idx += bs
    
    last_images = image_list[idx: len(image_list)]
    features[idx: len(image_list)] = get_feature(last_images, model)
    
    return features

In [148]:
all_feature = feature_extraction(data, base_model, 32)

In [149]:
print(all_feature.shape)

(286, 2048)


In [153]:
%%time
from sklearn.utils import shuffle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(256, activation="relu", input_dim=all_feature.shape[1]))
model.add(Dense(256, activation="relu"))
model.add(Dense(3, activation="softmax"))
model.summary()

x = shuffle(all_feature, random_state=99)
y = shuffle(labels, random_state=99)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
history = model.fit(x, y, epochs=10, batch_size=32)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 256)               524544    
_________________________________________________________________
dense_22 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_23 (Dense)             (None, 3)                 771       
Total params: 591,107
Trainable params: 591,107
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 28.9 s, sys: 148 ms, total: 29 s
Wall time: 28.6 s


### Add top layer to ResNet 50 <= can use Data Augmentation

In [154]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten


base_model = ResNet50(weights='imagenet', include_top = False, input_shape=size_3d, pooling='avg')
base_model.trainable=False
model = Sequential()
model.add(base_model)
# model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dense(3, activation="softmax"))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_24 (Dense)             (None, 256)               524544    
_________________________________________________________________
dense_25 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_26 (Dense)             (None, 3)                 771       
Total params: 24,178,819
Trainable params: 591,107
Non-trainable params: 23,587,712
_________________________________________________________________


In [155]:
import random
import cv2
import numpy as np
from sklearn.utils import shuffle
class DataGenerator():
    def __init__(self, image_list, label_list, img_size, batch_size):
        self.image_list = shuffle(image_list, random_state=99)
        self.label_list = shuffle(label_list, random_state=99)
        self.img_size = img_size
        self.batch_size = batch_size
        self.current_index = 0
        
    def get_batch(self):
        batch_image_list = self.image_list[self.current_index: self.current_index + self.batch_size]
        batch_image_label = self.label_list[self.current_index: self.current_index + self.batch_size]
        img_arr = np.ones((self.batch_size, ) + self.img_size)
        lbl_arr = np.zeros((self.batch_size, 3))
        for i in range(self.batch_size):
            img_path = batch_image_list[i]
            label = batch_image_label[i]
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            img = cv2.resize(img, (224,224))
#             print(img.shape)
            img = img / 255.
            img_arr[i, :, :, :] = img
            lbl_arr[i][batch_image_label[i]] = 1
        
        return (img_arr, lbl_arr)
    
    def next_batch(self):
        while True:
            if self.current_index > len(self.image_list) - self.batch_size:
                self.image_list = shuffle(self.image_list, random_state = 50)
                self.label_list = shuffle(self.label_list, random_state = 50)
                self.current_index = 0
#                 print('reset current_index')
                
            tmp = self.get_batch()
#             print('*', end='')
            self.current_index += self.batch_size
            yield tmp
        

In [156]:
print(len(data), len(labels))

286 286


In [157]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

In [158]:
datagen = DataGenerator(data, labels, size_3d, 32)
step = len(data) // 32
print(step)

8


In [159]:
history = model.fit_generator(generator=datagen.next_batch(), steps_per_epoch=step, epochs=10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
