In [1]:
# Thêm thư viện
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from imutils import paths
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.layers import Input
from keras.models import Model
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.layers.core import Flatten
import numpy as np
import random
import os

In [13]:
# 1. preprocessing image label
# 1.1 pick image path
image_path = list(paths.list_images('/home/coffeeaddicted/Documents/Document/AI/Course/Ai4e/Cifar100/dataset/'))

# 1.2 random suffle image path
random.shuffle(image_path)

In [None]:
# 1.3 pick label of image
labels = []
for i in range(len(image_path)):
    label = image_path[i].split(os.path.sep)[-2]
    labels.append(label)
print(labels)

In [20]:
# 1.4 transform labels into number
le = LabelEncoder()
labels = le.fit_transform(labels)

In [21]:
# 1.5 transform labels into binary
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

In [26]:
# 2. preprocess image
# load images and resized into input size of VGG16
list_images = []
for (j,image_path) in enumerate(image_path):
    image = load_img(image_path,target_size=(224, 224))
    image = img_to_array(image)
    image = np.expand_dims(image,0)
    image = imagenet_utils.preprocess_input(image)

    list_images.append(image)
list_images = np.vstack(list_images)

In [28]:
print(list_images.shape)

(1360, 224, 224, 3)


In [30]:
# Load model VGG 16 của ImageNet dataset, include_top=False để bỏ phần Fully connected layer ở cuối.
baseModel = VGG16(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)))

# Xây thêm các layer
# Lấy output của ConvNet trong VGG16
fcHead = baseModel.output

# Flatten trước khi dùng FCs
fcHead = Flatten(name='flatten')(fcHead)

# Thêm FC
fcHead = Dense(256, activation='relu')(fcHead)
fcHead = Dropout(0.5)(fcHead)

# Output layer với softmax activation
fcHead = Dense(17, activation='softmax')(fcHead)

# Xây dựng model bằng việc nối ConvNet của VGG16 và fcHead
model = model = Model(inputs=baseModel.input, outputs=fcHead)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [31]:
# separate data into 80%
x_train, x_test, y_train, y_test = train_test_split(list_images,labels, train_size=0.8, random_state= 42)

In [32]:
# augmentation cho training data
aug_train = ImageDataGenerator(rescale=1./255, rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, 
                         zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')
# augementation cho test
aug_test= ImageDataGenerator(rescale=1./255)

In [None]:
# freeze VGG model
for layer in baseModel.layers:
    layer.trainable = False
    
opt = RMSprop(0.001)
model.compile(opt, 'categorical_crossentropy', ['accuracy'])
numOfEpoch = 25
H = model.fit_generator(aug_train.flow(x_train, y_train, batch_size=32), 
                        steps_per_epoch=len(x_train)//32,
                        validation_data=(aug_test.flow(x_test, y_test, batch_size=32)),
                        validation_steps=len(x_test)//32,
                        epochs=numOfEpoch)

In [None]:
# unfreeze some last CNN layer:
for layer in baseModel.layers[15:]:
    layer.trainable = True

numOfEpoch = 35
opt = SGD(0.001)
model.compile(opt, 'categorical_crossentropy', ['accuracy'])
H = model.fit_generator(aug_train.flow(x_train, y_train, batch_size=32), 
                        steps_per_epoch=len(x_train)//32,
                        validation_data=(aug_test.flow(x_test, y_test, batch_size=32)),
                        validation_steps=len(x_test)//32,
                        epochs=numOfEpoch)