In [16]:
# import lib
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from imutils import paths
from keras.applications.vgg16 import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import random
import os

In [17]:
# Extract zipfile data
from zipfile import ZipFile
file_name = '/content/dataset.zip'
with ZipFile(file_name, 'r') as zip:
  zip.extractall()

In [18]:
# Take image path and shuffle
image_path = list(paths.list_images('/content/dataset'))
random.shuffle(image_path)

In [19]:
# Take label
labels = [p.split(os.path.sep)[-2] for p in image_path]

# Convert label to number
le = LabelEncoder()
labels = le.fit_transform(labels)

In [20]:
# Load model VGG16
model = VGG16(weights = 'imagenet', include_top=False)

In [21]:
# Load image and resize image to VGG16 (224, 224)
list_image = []
for (j, imagePath) in enumerate(image_path):
  image = load_img(imagePath, target_size = (224, 224))
  image = img_to_array(image)

  image = np.expand_dims(image, 0)
  image = imagenet_utils.preprocess_input(image)
  
  list_image.append(image)

list_image = np.vstack(list_image)

In [22]:
# Use pre_trained model to take image feature
features = model.predict(list_image)

# Similar to flatten in CNN, tensor 3 dims to vector 1 dim
features = features.reshape((features.shape[0], 512*7*7))

In [23]:
# Divide training test, test set
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.2, random_state=42)

Dùng Grid Search để tìm parameter tốt nhất cho model.


In [None]:
params = {'C' : [0.1, 1.0, 10.0, 100.0]}

model = GridSearchCV(LogisticRegression(), params)
model.fit(X_train, y_train)
print('Best parameter for the model {}'.format(model.best_params_))

In [None]:
# Evaluate Model
preds = model.predict(X_test)
print(classification_report(y_test, preds))