In [0]:
from google_drive_downloader import GoogleDriveDownloader as gdd

gdd.download_file_from_google_drive(file_id='1L_NwmgsMjPCarLpo_WVbWs_wGn0PvqZw', 
                                    dest_path='./dataset.zip', unzip=True)

In [0]:
from imutils import paths
import random

# lấy các đường dẫn đến ảnh
image_path = list(paths.list_images('dataset/'))
random.shuffle(image_path)

In [0]:
import os

# lấy tên loài hoa
labels = [p.split(os.path.sep)[-2] for p in image_path]

In [7]:
from sklearn.preprocessing import LabelEncoder

# chuyển tên loài hoa thành số
le = LabelEncoder()
labels = le.fit_transform(labels)
labels

array([15, 11, 11, ...,  6,  8,  8])

In [8]:
from keras.applications import VGG16

# load pre-trained model - VGG16
pre_trained_model = VGG16(weights='imagenet', include_top=False)













In [9]:
from keras.preprocessing.image import load_img, img_to_array
import numpy as np
from keras.applications import imagenet_utils

# load ảnh và resize về đúng size mà VGG16 yêu cầu (224, 224)
list_image = []
for path in image_path:
  img = load_img(path, target_size=(224,224))
  img = img_to_array(img) # (224, 224, 3)

  img = np.expand_dims(img, 0) # (1, 224, 224, 3) for imagenet_utils.preprocess_input
  img = imagenet_utils.preprocess_input(img)

  list_image.append(img)

list_image = np.vstack(list_image)
list_image.shape

(1360, 224, 224, 3)

# Feature Extraction

In [0]:
features = pre_trained_model.predict(list_image)

In [0]:
# flatten
features = features.reshape((features.shape[0], 512*7*7))

In [0]:
features.shape

## Feature extraction - Softmax

In [0]:
# one-hot-encoding
import tensorflow as tf

num_classes = len(np.unique(labels))
labels_ohe = tf.keras.utils.to_categorical(labels, num_classes=num_classes)

In [0]:
# Chia training set và test set tỉ lệ 80-20

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(features, labels_ohe, test_size=0.2, random_state=42)

In [0]:
X_train.shape

In [0]:
Y_train.shape

In [0]:
from keras import Sequential
from keras.layers import Dense, Dropout

model = Sequential()
model.add(Dense(num_classes, activation='softmax'))

In [0]:
epochs = 1000
batch_size = 64

model.compile(optimizer=tf.train.AdamOptimizer(learning_rate=0.01), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=1)

## Feature extraction - Logistic regression

In [0]:
# Chia traing set, test set tỉ lệ 80-20
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [0]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
# Grid search để tìm các parameter tốt nhất cho model. C = 1/lamda, hệ số trong regularisation. Solver là kiểu optimize
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
params = {'C' : [0.1, 1.0, 10.0, 100.0]}
#model = GridSearchCV(LogisticRegression(solver='lbfgs', multi_class='multinomial'), params)
model = GridSearchCV(LogisticRegression(), params)
model.fit(X_train, y_train)
print('Best parameter for the model {}'.format(model.best_params_))

In [0]:
from sklearn.metrics import classification_report

# Đánh giá model
preds = model.predict(X_test)
print(classification_report(y_test, preds))

# Fine Tuning

In [0]:
num_classes = len(np.unique(labels))

# one-hot-encoding
import tensorflow as tf
labels_ohe = tf.keras.utils.to_categorical(labels, num_classes=num_classes)

In [11]:
from keras.applications import VGG16
from keras.layers import Flatten, Dense, Dropout
from keras.models import Model
from keras.layers import Input

baseModel = VGG16(weights='imagenet', include_top=False, input_tensor=Input(shape=(224,224,3)))

# Xây dựng thêm các layer
# Lấy output của VGG 16
fcHead = baseModel.output
fcHead = Flatten(name='flatten')(fcHead)
# Thêm FC
fcHead = Dense(256, activation='relu')(fcHead)
fcHead = Dropout(0.5)(fcHead)
fcHead = Dense(num_classes, activation='softmax')(fcHead)

model = Model(inputs=baseModel.input, output=fcHead)


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [0]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(list_image, labels_ohe, 
                                                    test_size=0.2, random_state=2019)

In [0]:
# agument data
from keras.preprocessing.image import ImageDataGenerator

datagen_train = ImageDataGenerator(rescale=1./255, 
                             rotation_range=30, 
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             shear_range=0.2, 
                             zoom_range=0.2, 
                             horizontal_flip=True, 
                             fill_mode='nearest')

In [18]:
x = datagen_train.flow(x_train, y_train, batch_size=32)
x.batch_size

32

In [0]:
from keras.optimizers import RMSprop

# freeze
for layer in baseModel.layers:
  layer.trainable = False

model.compile(RMSprop(1e-3), 
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit_generator(datagen_train.flow(x_train, y_train, batch_size=32),
                    steps_per_epoch=len(x_train)//32, epochs=20)

In [0]:
from keras.optimizers import SGD

# unfreeze
for layer in baseModel.layers[15:]:
  layer.trainable = True

model.compile(SGD(1e-4), 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit_generator(datagen_train.flow(x_train, y_train, batch_size=32),
                    steps_per_epoch=len(x_train)//32, epochs=20)