# 部分採用，擷取特徵

In [None]:
# -*- coding: utf-8 -*-
#-*- coding: cp950 -*-

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path
from typing import *
import torch
import torch.optim as optim
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os

from google.colab import drive
drive.mount('/content/gdrive',force_remount = True)
base_dir = '/content/gdrive/MyDrive'
path = Path(base_dir +'/Keras_tutorial')  #imgs
# path.mkdir(parents=True,exist_ok=True)
os.chdir(path)

Mounted at /content/gdrive


In [None]:
>>特徵萃取
特徵萃取（feature extraction）的原理，就是將原來模型FC層(全連結層)的分類器移除然後用新的分類器取代，而負責萃取資料的CNN Base則維持不變。

>>特徵萃取有兩種作法：無論是第一或第二種方法，皆需要將後端FC層分類器移除
1.將特徵存取(CNN Base)與分類器層視為不同的個體，
先將dataset的圖片在CNN Base跑過一次，取得所有的特徵向量後，再將這些特徵向量送到分類器進行分類訓練，
因此，CNN base僅負責初次的特徵萃取，之後便在分類層上進行訓練
此種方式的優點是整體的訓練時間短，因為實際訓練的僅在FC層，而CNN 層在取出特徵圖之後便不再使用，是故所有圖片在CNN層上只需run過一次，
後續在FC層的分類器訓練則視epoch次數而定，由於訓練時間很短，因此很適合在CPU或無GPU的環境訓練。
不過，此法的缺點則是無法透過ImageDataGenerator on-fly的產生資料強化用的圖片（除非我們事先產生好）。

2.將新的FC層分類器附加在已去除分類器的CNN layers後方，
形成跟之前一樣完整的模型來進行訓練，跟之前模型差別只在於分類器的不同。
此法的好處是可以把它當成一般模型來使用，因此能夠on-fly的透過ImageDataGenerator來產生大量圖片來訓練，
缺點是就跟一般的CNN訓練一樣，圖片在CNN layers與FC layers持續的by batch訓練，相當的耗時，因此不適合在無GPU的環境下訓練。

>>下方以Keras所內建的VGG16模型為例，示範如何將用於softmax分類的FC layer移除。

In [None]:
##################################################################
If we were to stop propagation before the fully-connected layers in VGG16, 
the last layer in the network would become the max-pooling layer (Figure 2, right),
which will have an output shape of 7 x 7 x 512. Flattening, 
this volume into a feature vector we would obtain a list of 7 x 7 x 512 = 25,088 values —
this list of numbers serves as our feature vector used to quantify the input image.

In [None]:
>>>Extracting features using Keras and pre-trained CNNs
With weights dialed in and by loading our model without the head, we are now ready for transfer learning.
We will use the output values of the network directly, storing the results as feature vectors.

In [None]:
#configuration file
# import the necessary packages
import os

# initialize the path to the *original* input directory of images
ORIG_INPUT_DATASET = "Food-5K"

# initialize the base path to the *new* directory that will contain our images after computing the training and testing split
BASE_PATH = "dataset"

# define the names of the training, testing, and validation directories
TRAIN = "training"
TEST = "evaluation"
VAL = "validation"

# initialize the list of class label names
CLASSES = ["non_food", "food"]

# set the batch size
BATCH_SIZE = 32


# we can build the rest of our paths:
# initialize the label encoder file path and the output directory to
# where the extracted features (in CSV file format) will be stored
LE_PATH = os.path.sep.join(["output", "le.cpickle"])
BASE_CSV_PATH = "output"

# set the path to the serialized model after training
MODEL_PATH = os.path.sep.join(["output", "model.cpickle"])

In [None]:
#Building our dataset for feature extraction
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from pyimagesearch import config
from imutils import paths
import numpy as np
import pickle
import random
import os

In [None]:
#Let’s loop over our data splits:
# loop over the data splits
for split in (config.TRAIN, config.TEST, config.VAL):
	# grab all image paths in the current split
	print("[INFO] processing '{} split'...".format(split))
	p = os.path.sep.join([config.BASE_PATH, split])
	imagePaths = list(paths.list_images(p))

	# randomly shuffle the image paths and then extract the class
	# labels from the file paths
	random.shuffle(imagePaths)
	labels = [p.split(os.path.sep)[-2] for p in imagePaths]

	# if the label encoder is None, create it
	if le is None:
		le = LabelEncoder()
		le.fit(labels)

	# open the output CSV file for writing
	csvPath = os.path.sep.join([config.BASE_CSV_PATH,
		"{}.csv".format(split)])
	csv = open(csvPath, "w")

In [None]:
#The next step is to loop over our imagePaths in BATCH_SIZE chunks:
	# loop over the images in batches
	for (b, i) in enumerate(range(0, len(imagePaths), config.BATCH_SIZE)):
		# extract the batch of images and labels, then initialize the
		# list of actual images that will be passed through the network
		# for feature extraction
		print("[INFO] processing batch {}/{}".format(b + 1,
			int(np.ceil(len(imagePaths) / float(config.BATCH_SIZE)))))
		batchPaths = imagePaths[i:i + config.BATCH_SIZE]
		batchLabels = le.transform(labels[i:i + config.BATCH_SIZE])
		batchImages = []
        
#Let’s go ahead and populate our batchImages now:
		# loop over the images and labels in the current batch
		for imagePath in batchPaths:
			# load the input image using the Keras helper utility
			# while ensuring the image is resized to 224x224 pixels
			image = load_img(imagePath, target_size=(224, 224))
			image = img_to_array(image)

			# preprocess the image by (1) expanding the dimensions and
			# (2) subtracting the mean RGB pixel intensity from the
			# ImageNet dataset
			image = np.expand_dims(image, axis=0)
			image = preprocess_input(image)

			# add the image to the batch
			batchImages.append(image)

        
#Now we will pass the batch of images through our network to extract features:
		# pass the images through the network and use the outputs as
		# our actual features, then reshape the features into a
		# flattened volume
		batchImages = np.vstack(batchImages)
		features = model.predict(batchImages, batch_size=config.BATCH_SIZE)
		features = features.reshape((features.shape[0], 7 * 7 * 512))

In [None]:
Our batch of images is sent through the network via Lines 71 and 72. 
Keep in mind that we have removed the fully-connected layer head of the network. 
Instead, the forward propagation stops at the max-pooling layer. 
We will treat the output of the max-pooling layer as a list of features , also known as a “feature vector”.

The output dimension of the max-pooling layer is (batch_size, 7 x 7 x 512). 
We can thus reshape the features into a NumPy array of shape (batch_size, 7 * 7 * 512),
treating the output of the CNN as a feature vector.

In [None]:
#Let’s wrap up this script:
		# loop over the class labels and extracted features
		for (label, vec) in zip(batchLabels, features):
			# construct a row that exists of the class label and
			# extracted features
			vec = ",".join([str(v) for v in vec])
			csv.write("{},{}\n".format(label, vec))

	# close the CSV file
	csv.close()

# serialize the label encoder to disk
f = open(config.LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close()

In [None]:
Maintaining our batch efficiency, the features and associated class labels are written to our CSV file (Lines 76-80).

Inside the CSV file, the class label is the first field in each row (enabling us to easily extract it from the row during training). The feature vec follows.
Each CSV file will be closed via Line 83. Recall that upon completion we will have one CSV file per data split.
Finally, we can dump the label encoder to disk (Lines 86-88).

In [None]:
print(features.shape)

(1, 7, 7, 512)


In [None]:
We can then repeat the process for our entire dataset of images.

Given a total of N images in our network, our dataset would now be represented as a list of N vectors, each of 25,088-dim.
Once we have our feature vectors, we can train off-the-shelf machine learning models 
such as Linear SVM, Logistic Regression, Decision Trees, or Random Forests on top of these features to obtain a classifier 
that can recognize new classes of images.

In [None]:
That said, the two most common machine learning models you’ll see for transfer learning via feature extraction are:
1Logistic Regression
2Linear SVM

Why those two models?
First, keep in mind our feature extractor is a CNN.
CNN’s are non-linear models capable of learning non-linear features — 
we are assuming that the features learned by the CNN are already robust and discriminative.

The second, and perhaps arguably more important reason, 
is that our feature vectors tend to be very large and have high dimensionality.
We, therefore, need a fast model that can be trained on top of the features — linear models tend to be very fast to train.
For example, our dataset of 5,000 images, each represented by a feature vector of 25,088-dim,
can be trained in a few seconds using a Logistic Regression model.

To wrap up this section, I want you to keep in mind that the CNN itself is not capable of recognizing these new classes.
Instead, we are using the CNN as an intermediary feature extractor.

In [None]:
>>>Implementing our training script
The final step for transfer learning via feature extraction is to implement a Python script 
that will take our extracted features from the CNN and then train a Logistic Regression model on top of the features.

In [None]:
# import the necessary packages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from pyimagesearch import config
import numpy as np
import pickle
import os

def load_data_split(splitPath):
	# initialize the data and labels
	data = []
	labels = []

	# loop over the rows in the data split file
	for row in open(splitPath):
		# extract the class label and features from the row
		row = row.strip().split(",")
		label = row[0]
		features = np.array(row[1:], dtype="float")

		# update the data and label lists
		data.append(features)
		labels.append(label)

	# convert the data and labels to NumPy arrays
	data = np.array(data)
	labels = np.array(labels)

	# return a tuple of the data and labels
	return (data, labels)

#With the load_data_spit function ready to go, let’s put it to work by loading our data:
# derive the paths to the training and testing CSV files
trainingPath = os.path.sep.join([config.BASE_CSV_PATH,
	"{}.csv".format(config.TRAIN)])
testingPath = os.path.sep.join([config.BASE_CSV_PATH,
	"{}.csv".format(config.TEST)])

# load the data from disk
print("[INFO] loading data...")
(trainX, trainY) = load_data_split(trainingPath)
(testX, testY) = load_data_split(testingPath)

# load the label encoder from disk
le = pickle.loads(open(config.LE_PATH, "rb").read())

# train the model
print("[INFO] training model...")
model = LogisticRegression(solver="lbfgs", multi_class="auto",
	max_iter=150)
model.fit(trainX, trainY)

# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(testX)
print(classification_report(testY, preds, target_names=le.classes_))

# serialize the model to disk
print("[INFO] saving model...")
f = open(config.MODEL_PATH, "wb")
f.write(pickle.dumps(model))
f.close()