#Computer Vision
# Phân loại hình ảnh sử dụng mô hình Bag-of-Words
Nguyễn Quốc Thái - 20212642M

Chu Văn Tiến - 20212164M

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
%cd /content/drive/MyDrive/ThaiNQ_WorkSpace/image_classification_bow

/content/drive/MyDrive/ThaiNQ_WorkSpace/image_classification_bow


#1.Chuẩn bị dữ liệu

Bộ dữ liệu phân loại hình ảnh chó và mèo gồm 25.000 ảnh

In [None]:
!wget https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip
!unzip kagglecatsanddogs_5340.zip

Từ 25.000 ảnh, chọn bộ dữ liệu nhỏ chia thành 2 tập:
- Tập huấn luyện: 3.000 ảnh thuộc nhãn chó, 3.000 ảnh thuộc nhãn mèo
- Tập đánh giá: 1.000 ảnh thuộc nhãn chó, 1.000 ảnh thuộc nhãn mèo

Tập dữ liệu 8.000 ảnh có thể download từ:

In [None]:
!gdown 1-0dkrkks7AiatR5EQCozdiE2Q9nfSUga
!unzip cats_and_dogs.zip

#2.Đọc dữ liệu từ thư mục

In [9]:
import os

def read_data_from_folder(input_dir, data_type):
    image_paths, image_labels = [], []
    input_path = os.path.join(input_dir, data_type)
    for class_name in os.listdir(input_path):
        class_path = os.path.join(input_path, class_name)
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            if class_name == 'cats':
                image_paths.append(image_path)
                image_labels.append(0)
            elif class_name == 'dogs':
                image_paths.append(image_path)
                image_labels.append(1)
            else:
                pass
    return image_paths, image_labels

#3.Save và load các dữ liệu (các đặc trưng và bộ từ điển trực quan)

In [2]:
import pickle

def save_data(data, save_file_path):
    """ Save data into save file path """
    with open(save_file_path, "wb") as f:
        pickle.dump(data, f)


def load_data(save_file_path):
    """Load data from the save file path"""
    with open(save_file_path, "rb") as f:
        data = pickle.load(f)
    return data

#4.Trích chọn đặc trưng cục bộ sử dụng SIFT và xây dựng bộ từ điển trực quan

In [3]:
import os
import pickle
import cv2
from tqdm import tqdm

class ImageProcesser:
    def __init__(self, feature_extractor='sift'):

        self.feature_extractor = feature_extractor

        if self.feature_extractor.lower() == 'sift':
            self.extractor = cv2.xfeatures2d.SIFT_create()

        elif self.feature_extractor.lower() == 'kaze':
            self.extractor = cv2.KAZE_create()
        else:
            print('Support two algorithms: sift | kaze')
    
    def convert_to_gray(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return gray
    
    def convert_to_feature(self, image):
        keypoints, descriptors = self.extractor.detectAndCompute(image, None)
        return keypoints, descriptors


def extract_feature(image_paths, image_labels, save_feature_path, image_processer):
    """Extract the feature for all the images in input dir. """

    features, labels = [], []
    # Read images
    for image_path, image_label in tqdm(zip(image_paths, image_labels)):
        image = cv2.imread(image_path)
        if image is not None:
            image_gray = image_processer.convert_to_gray(image)
            keypoints, descriptors = image_processer.convert_to_feature(image_gray)
            if descriptors is not None:
                features.append(descriptors)
                labels.append(image_label)
            
    data_images = {
        'features': features,
        'labels': labels
    }
    save_data(data_images, save_feature_path)

    return features, labels
  
def build_codebook(save_feature_path, save_codebook_path, vocab_size=400):
    """Build the codebook (dictionary) for all the images in input dir. """

    data_image = load_data(save_feature_path)

    bow = cv2.BOWKMeansTrainer(vocab_size)
    # Read feature
    for feature in tqdm(data_image['features']):
        bow.add(feature)
    # Cluster all the descriptors and save it into output file
    codebook = bow.cluster()
    # Save code books
    save_data(codebook, save_codebook_path)

    return codebook

#5.Biểu diễn hình ảnh dựa vào bộ từ điển trực quan

In [4]:
import os
import time
import cv2
import pickle

def get_bow_extractor(image_processer, codebook):
    """Get the bag of words extractor object."""
    # Using FLANN matcher to match features
    FLANN_INDEX_KDTREE = 0
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)
    flann_matcher = cv2.FlannBasedMatcher(index_params, search_params)

    # Create the bow extractor
    bow_extractor = cv2.BOWImgDescriptorExtractor(image_processer.extractor, flann_matcher)
    bow_extractor.setVocabulary(codebook)
    return bow_extractor

def get_histogram(image_processer, bow_extractor, image):
    """Represent an image as histogram of visual codewords.
    """
    gray = image_processer.convert_to_gray(image)
    if gray is not None:
        keypoints = image_processer.extractor.detect(gray, None)
        histogram = bow_extractor.compute(gray, keypoints)
        return histogram
    else:
        return None

def get_cat_dog_data(image_processer, save_codebook_path, image_paths, train_image_labels):
    """Represent cat vs dog images as histogram of visual """

    codebook = load_data(save_codebook_path)
    bow_extractor = get_bow_extractor(image_processer, codebook)

    mega_data, mega_label = [], []
    for image_path, image_label in tqdm(zip(image_paths, train_image_labels)):
        image = cv2.imread(image_path)
        if image is not None:
            histogram = get_histogram(image_processer, bow_extractor, image)
            if histogram is not None:
                mega_data.append(histogram)
                mega_label.append(image_label)

    return mega_data, mega_label

#6.Huấn luyện và đánh giá mô hình

In [10]:
alg_extractor = 'sift'
input_dir = 'cats_and_dogs'
vocab_size = 200
save_feature_path = os.path.join('save_data', f'feature_{alg_extractor}.pkl')
save_codebook_path = os.path.join('save_data', f'codebook_{alg_extractor}_{str(vocab_size)}.pkl')

image_processer = ImageProcesser(alg_extractor)

In [11]:
alg_extractor = 'sift'
input_dir = 'cats_and_dogs'
vocab_size = 400
save_feature_path = os.path.join('save_data', f'feature_{alg_extractor}.pkl')
save_codebook_path = os.path.join('save_data', f'codebook_{alg_extractor}_{str(vocab_size)}.pkl')

image_processer = ImageProcesser(alg_extractor)

In [12]:
train_image_paths, train_image_labels = read_data_from_folder(input_dir, 'train')
val_image_paths, val_image_labels = read_data_from_folder(input_dir, 'test')

In [None]:
train_features, train_labels = extract_feature(train_image_paths, train_image_labels, save_feature_path, image_processer)
val_mega_data, val_labels = get_cat_dog_data(image_processer, save_codebook_path, val_image_paths, val_image_labels)

6000it [05:15, 19.02it/s]


**Xây dựng từ điển trực quan**

In [48]:
codebook = build_codebook(save_feature_path, save_codebook_path, vocab_size)

100%|██████████| 6000/6000 [00:00<00:00, 337406.81it/s]


**Logistic Regression Model**

In [13]:
train_mega_data, train_labels = get_cat_dog_data(image_processer, save_codebook_path, train_image_paths, train_image_labels)
val_mega_data, val_labels = get_cat_dog_data(image_processer, save_codebook_path, val_image_paths, val_image_labels)

6000it [45:34,  2.19it/s]
2000it [20:31,  1.62it/s]


In [14]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

clf = LogisticRegression(random_state=0)
clf.fit(np.squeeze(np.array(train_mega_data)), np.array(train_labels))

LogisticRegression(random_state=0)

**Đánh giá mô hình**

In [42]:
#200
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.67

**Tương tự với vocab_size=250, 300, ..., 500**

In [56]:
#500
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.665

In [61]:
#450
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.667

In [15]:
#400
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.6685

In [66]:
#350
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.676

In [47]:
#300
from sklearn.metrics import confusion_matrix,accuracy_score
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.6715

In [71]:
#250
pred = clf.predict(np.squeeze(np.array(val_mega_data)))
accuracy_score(pred, np.array(val_labels))

0.67