<H1>Extract features</H1>

<h3>Import required libraries</h3>

In [None]:
import numpy as np # linear algebra
import os # reading data
import cv2 # reading images
import pickle as cpickle # store data for fast processing

<h3>Setup the proper locations for the datasets folders</h3>
<p>Dataset can be found <a href="https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia">here</a>.</p>

In [None]:
trainDataDir = "...\\train"
testDataDir = "...\\test"
validateDataDir = "...\\val"

<h3>Initialize dictionaries in which we will store the data of each category</h3>

In [3]:
training_data = {}
testing_data = {}
validate_data = {}

In [None]:
categories = ["NORMAL", "PNEUMONIA"]

<h3>Extract data using KAZE descriptor function</h3>

In [None]:
def Get_Kaze_features(image):
    try:
        alg = cv2.KAZE_create()
        # Dinding image keypoints
        kps = alg.detect(image)
        # Getting first 32 of them.
        # Number of keypoints is varies depend on image size and color pallet
        # Sorting them based on keypoint response value(bigger is better)
        vector_size = 32
        kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
        # computing descriptors vector
        kps, dsc = alg.compute(image, kps)
        # Flatten all of them in one big vector - our feature vector
        dsc = dsc.flatten()
        # Making descriptor of same size
        # Descriptor vector size is 64
        needed_size = (vector_size * 64)
        if dsc.size < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
            dsc = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
        return dsc
    except cv2.error as e:
        print('Error: ' + e)
        return None

<h3>Extract data using HOG descriptor function</h3>

In [None]:
def Get_Hog_Features(image):
    try:
        cell_size = (8, 8)  # h x w in pixels
        block_size = (2, 2)  # h x w in cells
        nbins = 9  # number of orientation bins

        # winSize is the size of the image cropped to an multiple of the cell size
        hog = cv2.HOGDescriptor(_winSize=(image.shape[1] // cell_size[1] * cell_size[1],
                                          image.shape[0] // cell_size[0] * cell_size[0]),
                                _blockSize=(block_size[1] * cell_size[1],
                                            block_size[0] * cell_size[0]),
                                _blockStride=(cell_size[1], cell_size[0]),
                                _cellSize=(cell_size[1], cell_size[0]),
                                _nbins=nbins)

        n_cells = (image.shape[0] // cell_size[0], image.shape[1] // cell_size[1])
        dsc = hog.compute(image) \
            .reshape(n_cells[1] - block_size[1] + 1,
                     n_cells[0] - block_size[0] + 1,
                     block_size[0], block_size[1], nbins) \
            .transpose((1, 0, 2, 3, 4))
        return dsc.flatten()
    except cv2.error as e:
        print('Error: ' + e)
        return None