CNN là mạng phi tuyến có khả năng học các đặc trưng phi tuyến, giả sử là các đặc trưng mạnh và độc. bản thân CNN không có khả năng phân lớp.

In [1]:
import os

In [2]:
ORIG_INPUT_DATASET = 'Food-5K'
BASE_PATH = 'dataset'
TRAIN = 'training'
TEST = 'evaluation'
VAL = 'validation'

CLASSES = ['non_food', 'food']
BATCH_SIZE = 16

LE_PATH = os.path.join('output', 'le.cpickle')
BASE_CSV_PATH = 'output'

MODEL_PATH = os.path.join('output', 'model.cpickle')

# Building dataset for feature extraction

In [3]:
# from imutils import paths
import shutil
import os
 
for tp in (TRAIN, TEST, VAL):
    print(f'[INFO] processing {tp} split ...')
    p = os.path.join(ORIG_INPUT_DATASET, tp)
    imagePaths = [os.path.join(p, f) for f in os.listdir(p)]
    for path in imagePaths:
        fileName = path.split(os.path.sep)[-1]
        label = CLASSES[int(fileName.split('_')[0])]
        dirPath = os.path.join(BASE_PATH, tp, label)
        if not os.path.exists(dirPath):
            os.makedirs(dirPath)
        p = os.path.join(dirPath, fileName)
        shutil.copy2(path, p)

[INFO] processing training split ...
[INFO] processing evaluation split ...
[INFO] processing validation split ...


# extract feature

In [4]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import numpy as np
import pickle
import random

In [5]:
model = VGG16(weights='imagenet', include_top=False)

In [6]:
le = None

In [8]:
for tp in (TRAIN, TEST, VAL):
    print(f'[INFO] preprocessing {tp} split...')
    p = os.path.join(BASE_PATH, tp)
    pp = [os.path.join(p, lb) for lb in CLASSES] 
    imagePaths = [os.path.join(p, f) for p in pp for f in os.listdir(p)]
    random.shuffle(imagePaths)
    labels = [p.split(os.path.sep)[-2] for p in imagePaths]

    if le is None:
        le = LabelEncoder()
        le.fit(labels)
    
    csvPath = os.path.join(BASE_CSV_PATH, f'{tp}.csv')
    if not os.path.exists(BASE_CSV_PATH):
        os.makedirs(BASE_CSV_PATH)
    csv = open(csvPath, 'w')
    for (b, i) in enumerate(range(0, len(imagePaths), BATCH_SIZE)):
        print(f'[INFO] processing batch {b+1}/{int(np.ceil(len(imagePaths)/BATCH_SIZE))}')
        batchPaths = imagePaths[i:i+BATCH_SIZE]
        batchLabels = le.transform(labels[i:i+BATCH_SIZE])
        batchImages = []
        for batchPath in batchPaths:
            image = load_img(batchPath, target_size=(224, 224))
            image = img_to_array(image)
            image = np.expand_dims(image, axis=0)
            image = preprocess_input(image)
            batchImages.append(image)
        batchImages = np.vstack(batchImages)
        features = model.predict(batchImages, batch_size=BATCH_SIZE)
        features = features.reshape((features.shape[0], 7*7*2048))
        for (label, vec) in zip(batchLabels, features):
            vec = ','.join([str(v) for v in vec])
            csv.write(f'{label}, {vec}\n')
    csv.close()
f = open(LE_PATH, 'wb')
f.write(pickle.dumps(le))
f.close()

[INFO] preprocessing training split...
[INFO] processing batch 1/188
[INFO] processing batch 2/188
[INFO] processing batch 3/188
[INFO] processing batch 4/188
[INFO] processing batch 5/188
[INFO] processing batch 6/188
[INFO] processing batch 7/188
[INFO] processing batch 8/188
[INFO] processing batch 9/188
[INFO] processing batch 10/188
[INFO] processing batch 11/188
[INFO] processing batch 12/188
[INFO] processing batch 13/188
[INFO] processing batch 14/188
[INFO] processing batch 15/188
[INFO] processing batch 16/188
[INFO] processing batch 17/188
[INFO] processing batch 18/188
[INFO] processing batch 19/188
[INFO] processing batch 20/188
[INFO] processing batch 21/188
[INFO] processing batch 22/188
[INFO] processing batch 23/188
[INFO] processing batch 24/188
[INFO] processing batch 25/188
[INFO] processing batch 26/188
[INFO] processing batch 27/188
[INFO] processing batch 28/188
[INFO] processing batch 29/188
[INFO] processing batch 30/188
[INFO] processing batch 31/188
[INFO] pr

[INFO] processing batch 11/63
[INFO] processing batch 12/63
[INFO] processing batch 13/63
[INFO] processing batch 14/63
[INFO] processing batch 15/63
[INFO] processing batch 16/63
[INFO] processing batch 17/63
[INFO] processing batch 18/63
[INFO] processing batch 19/63
[INFO] processing batch 20/63
[INFO] processing batch 21/63
[INFO] processing batch 22/63
[INFO] processing batch 23/63
[INFO] processing batch 24/63
[INFO] processing batch 25/63
[INFO] processing batch 26/63
[INFO] processing batch 27/63
[INFO] processing batch 28/63
[INFO] processing batch 29/63
[INFO] processing batch 30/63
[INFO] processing batch 31/63
[INFO] processing batch 32/63
[INFO] processing batch 33/63
[INFO] processing batch 34/63
[INFO] processing batch 35/63
[INFO] processing batch 36/63
[INFO] processing batch 37/63
[INFO] processing batch 38/63
[INFO] processing batch 39/63
[INFO] processing batch 40/63
[INFO] processing batch 41/63
[INFO] processing batch 42/63
[INFO] processing batch 43/63
[INFO] pro

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pandas as pd


In [38]:
def load_data(splitPath):
    df = pd.read_csv(splitPath, header=None)
    data = df.iloc[:, 1:]
    labels = df[0]
    return (data, labels)

In [23]:
trainPath = os.path.join(BASE_CSV_PATH, 'training.csv')
testPath = os.path.join(BASE_CSV_PATH, 'evaluation.csv')


In [39]:
X_train, y_train = load_data(trainPath)
X_test, y_test = load_data(testPath)

In [29]:
dff = pd.read_csv(testPath, header=None)

In [30]:
dff[0].shape

(1000,)

In [37]:
dff.iloc[:,1:].shape

(1000, 25088)

In [32]:
dff[1:].shape

(999, 25089)

In [25]:
le = pickle.loads(open(LE_PATH, 'rb').read())

In [40]:
print(f'[INFO] training model...')
model = LogisticRegression(solver='lbfgs')
model.fit(X_train, y_train)

[INFO] training model...




LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [43]:
print('[INFO] evaluating...')
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))

[INFO] evaluating...
              precision    recall  f1-score   support

        food       0.99      0.98      0.98       500
    non_food       0.98      0.99      0.99       500

    accuracy                           0.98      1000
   macro avg       0.99      0.98      0.98      1000
weighted avg       0.99      0.98      0.98      1000



In [44]:
print('[INFO] saving model...')
f = open(MODEL_PATH, 'wb')
f.write(pickle.dumps(model))
f.close()

[INFO] saving model...
