# Transfer Learning with Keras and Deep Learning
https://www.pyimagesearch.com/2019/05/20/transfer-learning-with-keras-and-deep-learning/

In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from pyimagesearch import config
from imutils import paths
import numpy as np
import pickle
import random
import os
import shutil

Using TensorFlow backend.


## Download dataset
`$ wget --passive-ftp --prefer-family=ipv4 --ftp-user FoodImage@grebvm2.epfl.ch \
	--ftp-password Cahc1moo ftp://tremplin.epfl.ch/Food-5K.zip`

## Configuration

In [2]:
# Initialize the path to the *original* input directory of images
ORIG_INPUT_DATASET = "Food-5K"

# Initialize the base path to the *new* directory that will contain
# our images after computing the training and testing split
BASE_PATH = "dataset"

# Define the names of the training, testing, and validation directories
TRAIN = "training"
TEST = "evaluation"
VAL = "validation"

# Initialize the list of class label names
CLASSES = ["non_food", "food"]

# Set the batch size
BATCH_SIZE = 32

# Initialize the label encoder file path and the output directory to
# where the extracted features (in CSV file format) will be stored
LE_PATH = os.path.sep.join(["output", "le.cpickle"])
BASE_CSV_PATH = "output"

# Set the path to the serialized model after training
MODEL_PATH = os.path.sep.join(["output", "model.cpickle"])

## Create dataset
The format to follow is `dataset_name/split_name/class_label/example_of_class_label.jpg`

In [3]:
# Loop over the data splits
for split in (TRAIN, TEST, VAL):
    # Grab all image paths in the current split
    print("Processing '{} split'...".format(split))
    p = os.path.sep.join([ORIG_INPUT_DATASET, split])
    imagePaths = list(paths.list_images(p))

    # Loop over the image paths
    for imagePath in imagePaths:
        # Extract class label from the filename
        filename = imagePath.split(os.path.sep)[-1]
        label = CLASSES[int(filename.split("_")[0])]

        # Construct the path to the output directory
        dirPath = os.path.sep.join([BASE_PATH, split, label])
        
        # If the output directory does not exist, create it
        if not os.path.exists(dirPath):
            os.makedirs(dirPath)

        # Construct the path to the output image file and copy it
        p = os.path.sep.join([dirPath, filename])
        shutil.copy2(imagePath, p)

Processing 'training split'...
Processing 'evaluation split'...
Processing 'validation split'...


In [4]:
!tree

[01;34m.[00m
├── [01;34mdataset[00m
│   ├── [01;34mevaluation[00m
│   │   ├── [01;34mfood[00m
│   │   │   ├── [01;35m1_0.jpg[00m
│   │   │   ├── [01;35m1_100.jpg[00m
│   │   │   ├── [01;35m1_101.jpg[00m
│   │   │   ├── [01;35m1_102.jpg[00m
│   │   │   ├── [01;35m1_103.jpg[00m
│   │   │   ├── [01;35m1_104.jpg[00m
│   │   │   ├── [01;35m1_105.jpg[00m
│   │   │   ├── [01;35m1_106.jpg[00m
│   │   │   ├── [01;35m1_107.jpg[00m
│   │   │   ├── [01;35m1_108.jpg[00m
│   │   │   ├── [01;35m1_109.jpg[00m
│   │   │   ├── [01;35m1_10.jpg[00m
│   │   │   ├── [01;35m1_110.jpg[00m
│   │   │   ├── [01;35m1_111.jpg[00m
│   │   │   ├── [01;35m1_112.jpg[00m
│   │   │   ├── [01;35m1_113.jpg[00m
│   │   │   ├── [01;35m1_114.jpg[00m
│   │   │   ├── [01;35m1_115.jpg[00m
│   │   │   ├── [01;35m1_116.jpg[00m
│   │   │   ├── [01;35m1_117.jpg[00m
│   │   │   ├── [01;35m1_118.jpg[00m
│   │   │   ├── [01;35m1_119.jpg[00m
│   │   │   ├── 

│   │   ├── [01;35m1_911.jpg[00m
│   │   ├── [01;35m1_912.jpg[00m
│   │   ├── [01;35m1_913.jpg[00m
│   │   ├── [01;35m1_914.jpg[00m
│   │   ├── [01;35m1_915.jpg[00m
│   │   ├── [01;35m1_916.jpg[00m
│   │   ├── [01;35m1_917.jpg[00m
│   │   ├── [01;35m1_918.jpg[00m
│   │   ├── [01;35m1_919.jpg[00m
│   │   ├── [01;35m1_91.jpg[00m
│   │   ├── [01;35m1_920.jpg[00m
│   │   ├── [01;35m1_921.jpg[00m
│   │   ├── [01;35m1_922.jpg[00m
│   │   ├── [01;35m1_923.jpg[00m
│   │   ├── [01;35m1_924.jpg[00m
│   │   ├── [01;35m1_925.jpg[00m
│   │   ├── [01;35m1_926.jpg[00m
│   │   ├── [01;35m1_927.jpg[00m
│   │   ├── [01;35m1_928.jpg[00m
│   │   ├── [01;35m1_929.jpg[00m
│   │   ├── [01;35m1_92.jpg[00m
│   │   ├── [01;35m1_930.jpg[00m
│   │   ├── [01;35m1_931.jpg[00m
│   │   ├── [01;35m1_932.jpg[00m
│   │   ├── [01;35m1_933.jpg[00m
│   │   ├── [01;35m1_934.jpg[00m
│   │   ├── [01;35m1_935.jpg[00m
│   │   ├── [01;35m1_936.jpg

## Extracting features from our dataset using Keras and pre-trained CNNs

In [5]:
# Load the VGG16 network and initialize the label encoder
# We do not include the fully-connected head with the softmax classifier.
# In other words, we chop off the head of the network.
model = VGG16(weights="imagenet", include_top=False)
le = None

# Loop over the data splits
for split in (TRAIN, TEST, VAL):
    # Grab all image paths in the current split
    print("Processing '{} split'...".format(split))
    p = os.path.sep.join([BASE_PATH, split])
    imagePaths = list(paths.list_images(p))

    # Randomly shuffle the image paths and then extract the class
    # labels from the file paths
    random.shuffle(imagePaths)
    labels = [p.split(os.path.sep)[-2] for p in imagePaths]

    # If the label encoder is None, create it
    if le is None:
        le = LabelEncoder()
        le.fit(labels)

    # Open the output CSV file for writing
    csvPath = os.path.sep.join([BASE_CSV_PATH, "{}.csv".format(split)])
    csv = open(csvPath, "w")

    # Loop over the images in batches
    for (b, i) in enumerate(range(0, len(imagePaths), BATCH_SIZE)):
        # Extract the batch of images and labels, then initialize the list of actual images 
        # that will be passed through the network for feature extraction
        print("Processing batch {}/{}".format(b + 1,
                                              int(np.ceil(len(imagePaths) / float(config.BATCH_SIZE)))))
        batchPaths = imagePaths[i:i + config.BATCH_SIZE]
        batchLabels = le.transform(labels[i:i + config.BATCH_SIZE])
        batchImages = []
        
        # Loop over the images and labels in the current batch
        for imagePath in batchPaths:
            # Load the input image using the Keras helper utility
            # while ensuring the image is resized to 224x224 pixels
            image = load_img(imagePath, target_size=(224, 224))
            image = img_to_array(image)
            
            # Preprocess the image by (1) expanding the dimensions and (2) subtracting the
            # mean RGB pixel intensity from the ImageNet dataset
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image)

            # Add the image to the batch
            batchImages.append(image)

        # Pass the images through the network and use the outputs as our actual features,
        # then reshape the features into a flattened volume
        batchImages = np.vstack(batchImages)
        features = model.predict(batchImages, batch_size=BATCH_SIZE)
        features = features.reshape((features.shape[0], 7 * 7 * 512))

        # Loop over the class labels and extracted features
        for (label, vec) in zip(batchLabels, features):
            # Construct a row that exists of the class label and extracted features
            vec = ",".join([str(v) for v in vec])
            csv.write("{},{}\n".format(label, vec))

    # Close the CSV file
    csv.close()

# Serialize the label encoder to disk
f = open(LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close()

Processing 'training split'...
Processing batch 1/94
Processing batch 2/94
Processing batch 3/94
Processing batch 4/94
Processing batch 5/94
Processing batch 6/94
Processing batch 7/94
Processing batch 8/94
Processing batch 9/94
Processing batch 10/94
Processing batch 11/94
Processing batch 12/94
Processing batch 13/94
Processing batch 14/94
Processing batch 15/94
Processing batch 16/94
Processing batch 17/94
Processing batch 18/94
Processing batch 19/94
Processing batch 20/94
Processing batch 21/94
Processing batch 22/94
Processing batch 23/94
Processing batch 24/94
Processing batch 25/94
Processing batch 26/94
Processing batch 27/94
Processing batch 28/94
Processing batch 29/94
Processing batch 30/94
Processing batch 31/94
Processing batch 32/94
Processing batch 33/94
Processing batch 34/94
Processing batch 35/94
Processing batch 36/94
Processing batch 37/94
Processing batch 38/94
Processing batch 39/94
Processing batch 40/94
Processing batch 41/94
Processing batch 42/94
Processing b

## Training

In [6]:
def load_data_split(splitPath):
    # Initialize the data and labels
    data = []
    labels = []

    # Loop over the rows in the data split file
    for row in open(splitPath):
        # Extract the class label and features from the row
        row = row.strip().split(",")
        label = row[0]
        features = np.array(row[1:], dtype="float")

        # Update the data and label lists
        data.append(features)
        labels.append(label)

    # Convert the data and labels to NumPy arrays
    data = np.array(data)
    labels = np.array(labels)

    # Return a tuple of the data and labels
    return (data, labels)

In [7]:
# Derive the paths to the training and testing CSV files
trainingPath = os.path.sep.join([BASE_CSV_PATH, "{}.csv".format(TRAIN)])
testingPath = os.path.sep.join([BASE_CSV_PATH, "{}.csv".format(TEST)])

# Load the data from disk
(trainX, trainY) = load_data_split(trainingPath)
(testX, testY) = load_data_split(testingPath)

# Load the label encoder from disk
le = pickle.loads(open(LE_PATH, "rb").read())

In [8]:
# Train the model
print("Training model...")
model = LogisticRegression(solver="lbfgs", multi_class="auto", max_iter=1000)
model.fit(trainX, trainY)

# Evaluate the model
print("Evaluating...")
preds = model.predict(testX)
print(classification_report(testY, preds, target_names=le.classes_))

# Serialize the model to disk
print("Saving model...")
f = open(MODEL_PATH, "wb")
f.write(pickle.dumps(model))
f.close()

Training model...
Evaluating...
              precision    recall  f1-score   support

        food       0.99      0.98      0.98       500
    non_food       0.98      0.99      0.99       500

    accuracy                           0.98      1000
   macro avg       0.99      0.98      0.98      1000
weighted avg       0.99      0.98      0.98      1000

Saving model...
