In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# Thêm thư viện
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from imutils import paths
from keras.applications.vgg16 import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import numpy as np
import random
import os

In [7]:
# Lấy các đường dẫn đến ảnh.
image_path = list(paths.list_images('/content/drive/MyDrive/DeepLearning/6. Technique/dataset/'))

# Đổi vị trí ngẫu nhiên các đường dẫn ảnh
random.shuffle(image_path)

In [8]:
print(image_path)

['/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Tulip/image_0581.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Sunflower/image_0738.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Bluebell/image_0266.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Iris/image_0470.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Buttercup/image_1170.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Sunflower/image_0728.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Buttercup/image_1191.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Crocus/image_0351.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Cowslip/image_1104.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Buttercup/image_1163.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Cowslip/image_1110.jpg', '/content/drive/MyDrive/DeepLearning/6. Technique/dataset/Sunflower/image_0759.jp

In [9]:
# Đường dẫn ảnh sẽ là dataset/tên_loài_hoa/tên_ảnh. Ví dụ dataset/Bluebell/image_0241.jpg nên p.split(os.path.sep)[-2] sẽ lấy ra được tên loài hoa
labels = [p.split(os.path.sep)[-2] for p in image_path]

# Chuyển tên các loài hoa thành số
le = LabelEncoder()
labels = le.fit_transform(labels)

In [10]:
# Load pre-train model VGG 16 của ImageNet dataset, include_top=False để bỏ phần Fully connected layer ở cuối.
model = VGG16(weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [11]:
# Load ảnh và resize về đúng kích thước mà VGG 16 cần là (224,224)
list_image = []
for (j, imagePath) in enumerate(image_path):
    image = load_img(imagePath, target_size=(224, 224))
    image = img_to_array(image)
    
    image = np.expand_dims(image, 0)
    image = imagenet_utils.preprocess_input(image)
    
    list_image.append(image)
    
list_image = np.vstack(list_image)

In [12]:
# Dùng pre-trained model để lấy ra các feature của ảnh
features = model.predict(list_image)

# Giống bước flatten trong CNN, chuyển từ tensor 3 chiều sau ConvNet sang vector 1 chiều
features = features.reshape((features.shape[0], 512*7*7))

In [13]:
# Chia traing set, test set tỉ lệ 80-20
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [14]:
# Grid search để tìm các parameter tốt nhất cho model. C = 1/lamda, hệ số trong regularisation. Solver là kiểu optimize
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
params = {'C' : [0.1, 1.0, 10.0, 100.0]} # L2 C = 1 / alpha
#model = GridSearchCV(LogisticRegression(solver='lbfgs', multi_class='multinomial'), params)
model = GridSearchCV(LogisticRegression(), params)
model.fit(X_train, y_train)
print('Best parameter for the model {}'.format(model.best_params_))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Best parameter for the model {'C': 10.0}


In [15]:
# Đánh giá model
preds = model.predict(X_test)
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.79      0.92      0.85        12
           1       0.86      0.90      0.88        20
           2       1.00      0.67      0.80        15
           3       0.71      0.94      0.81        18
           4       0.91      0.95      0.93        21
           5       0.93      0.74      0.82        19
           6       1.00      0.93      0.96        14
           7       0.86      1.00      0.93        19
           8       1.00      0.88      0.94        17
           9       1.00      0.88      0.94        17
          10       0.93      1.00      0.96        13
          11       1.00      0.91      0.95        23
          12       0.67      0.86      0.75         7
          13       0.94      1.00      0.97        16
          14       1.00      1.00      1.00        12
          15       0.92      0.85      0.88        13
          16       1.00      1.00      1.00        16

    accuracy              