In [2]:
# creating model and using feature extraction to find different features and reduce curse of dimensionality
import numpy as np
import cv2
import pandas as pd
import os
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from skimage.feature import hog
from skimage.feature import local_binary_pattern
from tqdm.auto import tqdm

In [3]:
def calculate_hog_features(image):
    hog_image = cv2.resize(image, (512, 256), interpolation=cv2.INTER_AREA)
    fd, hog_image = hog(hog_image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)
    return fd, hog_image

In [4]:
# https://www.pyimagesearch.com/2015/12/07/local-binary-patterns-with-python-opencv/

def lbp_feature_selection(image):
    lbp = local_binary_pattern(image, 24, 3, )
    

    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 24 + 3), range=(0, 24 + 2))
    # normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    # return the histogram of Local Binary Patterns
    
    return hist

In [5]:
# loading in images and turing it into an array to use as features, collecting labels
path = os.getcwd()

path = os.path.join(path, 'dataset')
df_labels = pd.read_csv(os.path.join(path, 'label.csv'))

# getting train and test dataframes
train = pd.read_csv(os.path.join(path, 'train.csv'))
test = pd.read_csv(os.path.join(path, 'test.csv'))

classes = {'meningioma_tumor': 0, 'no_tumor': 1, 'glioma_tumor': 2, 'pituitary_tumor': 3}

def feature_extraction(dataset, img_path, name):
    features = []
    hog_features = []
    lbp_features = []
    labels = []
    for row in tqdm(dataset.iterrows(), total=dataset.shape[0], desc="Progress"):
        image_name = row[1][0]
        label = row[1][1]
        # opening and flattening image
        img = cv2.imread(os.path.join(img_path, image_name))
        # images are gray scale so there is useless data using RGB
        grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # calculating hog features
        fd, hog_image = calculate_hog_features(grayscale)
        fd = []
        # calculating LBP
        lbp_hist = lbp_feature_selection(grayscale)
        
        hog_features.append(fd)
        lbp_features.append(lbp_features)

        features.append(np.concatenate((fd, lbp_hist)))
        labels.append(classes.get(label))
    # saving features for use later
    np.save(f'./features/{name}/hog_features.npy', hog_features)
    np.save(f'./features/{name}/lbp_features.npy', lbp_features)

    return features, labels

train_path = os.path.join(path, 'train')
test_path = os.path.join(path, 'test')

x_train, y_train = feature_extraction(train, train_path, 'train')
x_test, y_test = feature_extraction(test, test_path, 'test')

print('Train size: ', len(x_train))
print('Test size: ', len(x_test))


Progress: 100%|██████████| 2100/2100 [08:54<00:00,  3.93it/s]

In [None]:
# PCA fitting for dimensionality reduction
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

def dimensionality_reduction(x_train, x_test):
    scalar = StandardScaler()

    # scaling data to perform pca on it
    scalar.fit(x_train)

    # applying scale to xtrain and xtest
    x_train = scalar.transform(x_train)
    x_test = scalar.transform(x_test)


    # creating pca
    pca = PCA(.95)

    # fitting pca on training data
    pca.fit(x_train)

    # applying to train and test
    x_train = pca.transform(x_train)
    x_test = pca.transform(x_test)
    return x_train, x_test

In [None]:
x_train, x_test = dimensionality_reduction(x_train, x_test)

In [None]:
x_train.shape

(2100, 1055)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model_random_forest = RandomForestClassifier()

model_random_forest.fit(x_train, y_train)

RandomForestClassifier()

In [None]:
# getting accuracy metrics and printing
pred = model_random_forest.predict(x_test)
score = accuracy_score(y_test, pred)
print('accuracy: ', score)

accuracy:  0.8166666666666667
