In [1]:
import cv2 as cv
import numpy as np
import os
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import pyfeats

In [2]:
def read_images(folder = "dataset/train",
                classes = [
                            "normal",
                            "fatty",
#                             "cirrhosis"
                        ]):
    image_names = {}
    images = {}
    # Get all image names in folders
    for cls in classes:
        image_names[cls] = os.listdir(f'{folder}/{cls}')

    # read all images to list
    for cls in classes:
        images[cls] = []
        for name in image_names[cls]:
            img = cv.imread(f'{folder}/{cls}/{name}', cv.IMREAD_GRAYSCALE)
            images[cls].append(img)
    return images

In [3]:
def extract_roi(img, start =(160,300) , size = (9,9)):
    roi = img[start[0]:start[0]+size[0],start[1]:start[1]+size[1]]
    mask = np.zeros(img.shape)
    mask[40:49,40:49] = 1
    return roi, mask

In [6]:
def feature_extraction(img):
    roi, mask = extract_roi(img)
    
    features = {}
    # 0 45 90 135 degrees
    # angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    glcm_mtx = greycomatrix(roi, distances = [1], angles = [0], levels = 256)
    columns = 

    features['contrast'] = greycoprops(glcm_mtx, 'contrast')[0,0]
    features['homogeneity'] = greycoprops(glcm_mtx, 'homogeneity')[0,0]
    features['energy'] = greycoprops(glcm_mtx, 'energy')[0,0]
    features['correlation'] = greycoprops(glcm_mtx, 'correlation')[0,0]


    features['entropy'] = shannon_entropy(roi)



    feat, labels = pyfeats.glrlm_features(img, mask, 256)
    glrlm = {l : f for l,f in zip(labels,feat)}
    features['longRunEmphasis'] = glrlm['GLRLM_LongRunEmphasis']
    features['runPercentage'] = glrlm['GLRLM_RunPercentage']
    return features

In [7]:
import pandas as pd
img = cv.imread('dataset/train/normal/n1.jpg', cv.IMREAD_GRAYSCALE)
feature_extraction(img)

{'contrast': 4.0,
 'homogeneity': 0.44351432880844643,
 'energy': 0.15590239111558088,
 'correlation': 0.8917963295534402,
 'entropy': 3.9407808223346885,
 'longRunEmphasis': 2.356688326973246,
 'runPercentage': 0.8379758628597638}

In [47]:
def build_dataframe(images, columns=""):
    # dataframe consists of features of 1 ROI per image
    # column name roiNum_feature
    data = pd.DataFrame(index = "name", columns = [
        'contrast',
        'homogeneity',
        'energy',
        'correlation',
        'entropy',
        'longRunEmphasis',
        'runPercentage',
        'target'
    ])
    for cls in images:
        for img in images[cls]:
            row = feature_extraction(img)
            row['target'] = cls
            data = data.append(row,ignore_index=True)
    return row, data

In [48]:
%%time
images = read_images()
row, data = build_dataframe(images)

Wall time: 1min 10s


In [50]:
data.describe()

Unnamed: 0,contrast,homogeneity,energy,correlation,entropy,longRunEmphasis,runPercentage
count,40.0,40.0,40.0,40.0,40.0,40.0,40.0
mean,44.198264,0.360875,0.144961,0.869691,4.223898,1.711326,0.974923
std,185.012921,0.073984,0.015503,0.188307,0.34741,0.361938,0.056691
min,1.847222,0.218489,0.125769,-0.072373,3.555415,1.403044,0.837976
25%,6.184028,0.29622,0.136083,0.876219,4.006589,1.51034,0.934854
50%,8.395833,0.370773,0.140955,0.912296,4.251664,1.559616,0.971073
75%,12.21875,0.403515,0.153721,0.947545,4.42514,1.775538,1.019465
max,1159.055556,0.559722,0.206006,0.972567,4.936263,3.137791,1.079653


In [55]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = data.drop(['target'],axis=1).values
y = data['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.375