In [1]:
import cv2 as cv
import numpy as np
import os
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import pyfeats

In [2]:
def read_images(folder = "dataset/train",
                classes = [
                            "normal",
                            "fatty",
#                             "cirrhosis"
                        ]):
    image_names = {}
    images = {}
    # Get all image names in folders
    for cls in classes:
        image_names[cls] = os.listdir(f'{folder}/{cls}')

    # read all images to list
    for cls in classes:
        images[cls] = []
        for name in image_names[cls]:
            img = cv.imread(f'{folder}/{cls}/{name}', cv.IMREAD_GRAYSCALE)
            images[cls].append(img)
    return images

In [3]:
def extract_roi(img, start =(160,300) , size = (9,9)):
    roi = img[start[0]:start[0]+size[0],start[1]:start[1]+size[1]]
    mask = np.zeros(img.shape)
    mask[40:49,40:49] = 1
    return roi, mask

In [6]:
def feature_extraction(img):
    roi, mask = extract_roi(img)
    
    features = {}
    # 0 45 90 135 degrees
    # angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    glcm_mtx = greycomatrix(roi, distances = [1], angles = [0], levels = 256)
    columns = 

    features['contrast'] = greycoprops(glcm_mtx, 'contrast')[0,0]
    features['homogeneity'] = greycoprops(glcm_mtx, 'homogeneity')[0,0]
    features['energy'] = greycoprops(glcm_mtx, 'energy')[0,0]
    features['correlation'] = greycoprops(glcm_mtx, 'correlation')[0,0]


    features['entropy'] = shannon_entropy(roi)



    feat, labels = pyfeats.glrlm_features(img, mask, 256)
    glrlm = {l : f for l,f in zip(labels,feat)}
    features['longRunEmphasis'] = glrlm['GLRLM_LongRunEmphasis']
    features['runPercentage'] = glrlm['GLRLM_RunPercentage']
    return features

In [7]:
import pandas as pd
img = cv.imread('dataset/train/normal/n1.jpg', cv.IMREAD_GRAYSCALE)
feature_extraction(img)

{'contrast': 4.0,
 'homogeneity': 0.44351432880844643,
 'energy': 0.15590239111558088,
 'correlation': 0.8917963295534402,
 'entropy': 3.9407808223346885,
 'longRunEmphasis': 2.356688326973246,
 'runPercentage': 0.8379758628597638}

In [20]:
def build_dataframe(images, columns=""):
    # dataframe consists of features of 1 ROI per image
    # column name roiNum_feature
    data = pd.DataFrame(columns = [
        'target',
        'correlation',
        'homogeneity',
        'entropy',
        'variance',
        'contrast',
        'longRunEmphasis',
        'runePercentage'
    ])
    for cls in images:
        for img in images[cls]:
            row = feature_extraction(img)
            row['target'] = cls
            print(row)
            data.append(row,ignore_index=True)
            break
        break
    return data

In [21]:
%%time
images = read_images()
build_dataframe(images)

{'contrast': 4.0, 'homogeneity': 0.44351432880844643, 'energy': 0.15590239111558088, 'correlation': 0.8917963295534402, 'entropy': 3.9407808223346885, 'longRunEmphasis': 2.356688326973246, 'runPercentage': 0.8379758628597638, 'target': 'normal'}
Wall time: 1.73 s


Unnamed: 0,target,correlation,homogeneity,entropy,variance,contrast,longRunEmphasis,runePercentage
