In [25]:
import cv2 as cv
import numpy as np
import os
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import pyfeats

In [26]:
def read_images(folder = "dataset/train",
                classes = [
                            "normal",
                            "fatty",
#                             "cirrhosis"
                        ]):
    image_names = {}
    images = {}
    # Get all image names in folders
    for cls in classes:
        image_names[cls] = os.listdir(f'{folder}/{cls}')

    # read all images to list
    for cls in classes:
        images[cls] = []
        for name in image_names[cls]:
            img = cv.imread(f'{folder}/{cls}/{name}', cv.IMREAD_GRAYSCALE)
            images[cls].append(img)
    return images

In [27]:
def extract_roi(img, start =(160,300) , size = (9,9)):
    roi = img[start[0]:start[0]+size[0],start[1]:start[1]+size[1]]
    mask = np.zeros(img.shape)
    mask[40:49,40:49] = 1
    return roi, mask

In [28]:
def feature_extraction(img):
    roi_pos = [
        (160,300),
        (118,224),
        (241,151),
        (120,420),
        (170,300),
        (400,200),
        (300,120),
        (240,240),
        (360,160)
    ]
    
    roi_arr = []
    mask_arr = []
    for pos in roi_pos:
        roi, mask = extract_roi(img, pos)
        roi_arr.append(roi)
        mask_arr.append(mask)
    
    features = {}
    # 0 45 90 135 degrees
    # angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    for i in range(len(roi_pos)):
        glcm_mtx = greycomatrix(roi, distances = [1], angles = [0], levels = 256)

        features[f'r{i}_contrast'] = greycoprops(glcm_mtx, 'contrast')[0,0]
        features[f'r{i}_homogeneity'] = greycoprops(glcm_mtx, 'homogeneity')[0,0]
        features[f'r{i}_energy'] = greycoprops(glcm_mtx, 'energy')[0,0]
        features[f'r{i}_correlation'] = greycoprops(glcm_mtx, 'correlation')[0,0]


        features[f'r{i}_entropy'] = shannon_entropy(roi)



        feat, labels = pyfeats.glrlm_features(img, mask, 256)
        glrlm = {l : f for l,f in zip(labels,feat)}
        features[f'r{i}_longRunEmphasis'] = glrlm['GLRLM_LongRunEmphasis']
        features[f'r{i}_runPercentage'] = glrlm['GLRLM_RunPercentage']
        
    return features

In [29]:
import pandas as pd
img = cv.imread('dataset/train/normal/n1.jpg', cv.IMREAD_GRAYSCALE)
columns = feature_extraction(img).keys()

In [30]:
def build_dataframe(images, columns):
    # dataframe consists of features of 1 ROI per image
    # column name roiNum_feature
#     data = pd.DataFrame( columns = [
#         'contrast',
#         'homogeneity',
#         'energy',
#         'correlation',
#         'entropy',
#         'longRunEmphasis',
#         'runPercentage',
#         'target'
#     ])
    data = pd.DataFrame( columns = columns)

    for cls in images:
        for img in images[cls]:
            row = feature_extraction(img)
            row['target'] = cls
            data = data.append(row,ignore_index=True)
    return row, data

In [36]:
%%time
images = read_images()
row, data = build_dataframe(images,columns)

Wall time: 10min 46s


In [37]:
data.describe()

Unnamed: 0,r0_contrast,r0_homogeneity,r0_energy,r0_correlation,r0_entropy,r0_longRunEmphasis,r0_runPercentage,r1_contrast,r1_homogeneity,r1_energy,...,r7_entropy,r7_longRunEmphasis,r7_runPercentage,r8_contrast,r8_homogeneity,r8_energy,r8_correlation,r8_entropy,r8_longRunEmphasis,r8_runPercentage
count,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,...,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
mean,53.700694,0.352659,0.1677,0.85759,4.156056,1.711326,0.974923,53.700694,0.352659,0.1677,...,4.156056,1.711326,0.974923,53.700694,0.352659,0.1677,0.85759,4.156056,1.711326,0.974923
std,194.561583,0.170415,0.109821,0.169179,0.942207,0.361938,0.056691,194.561583,0.170415,0.109821,...,0.942207,0.361938,0.056691,194.561583,0.170415,0.109821,0.169179,0.942207,0.361938,0.056691
min,1.916667,0.064052,0.117851,-0.030188,0.707324,1.403044,0.837976,1.916667,0.064052,0.117851,...,0.707324,1.403044,0.837976,1.916667,0.064052,0.117851,-0.030188,0.707324,1.403044,0.837976
25%,4.225694,0.198545,0.126913,0.829287,3.635395,1.51034,0.934854,4.225694,0.198545,0.126913,...,3.635395,1.51034,0.934854,4.225694,0.198545,0.126913,0.829287,3.635395,1.51034,0.934854
50%,8.506944,0.343895,0.147639,0.896952,4.069086,1.559616,0.971073,8.506944,0.343895,0.147639,...,4.069086,1.559616,0.971073,8.506944,0.343895,0.147639,0.896952,4.069086,1.559616,0.971073
75%,32.756944,0.460296,0.168673,0.945802,4.998307,1.775538,1.019465,32.756944,0.460296,0.168673,...,4.998307,1.775538,1.019465,32.756944,0.460296,0.168673,0.945802,4.998307,1.775538,1.019465
max,1233.513889,0.881947,0.822616,0.989035,5.74399,3.137791,1.079653,1233.513889,0.881947,0.822616,...,5.74399,3.137791,1.079653,1233.513889,0.881947,0.822616,0.989035,5.74399,3.137791,1.079653


In [38]:
%%time
from sklearn.model_selection import train_test_split

X = data.drop(['target'],axis=1).values
y = data['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Wall time: 997 µs


In [41]:
%%time
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 997 µs


0.5

In [42]:
%%time
from sklearn.svm import SVC

clf = SVC()
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 974 µs


0.375

In [None]:
%%time
from sklearn.svm import RandomForestClassifier

clf = SVC()
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)