In [88]:
import cv2 as cv
import numpy as np
import os
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import pyfeats
import pandas as pd

In [89]:
def read_images(folder = "dataset/train",
                classes = [
                            "normal",
                            "fatty",
#                             "cirrhosis"
                        ]):
    image_names = {}
    images = {}
    # Get all image names in folders
    for cls in classes:
        image_names[cls] = os.listdir(f'{folder}/{cls}')

    # read all images to list
    for cls in classes:
        images[cls] = []
        for name in image_names[cls]:
            img = cv.imread(f'{folder}/{cls}/{name}', cv.IMREAD_GRAYSCALE)
            images[cls].append(img)
    return images

In [90]:
def extract_roi(img, start =(160,300) , size = (9,9)):
    roi = img[start[0]:start[0]+size[0],start[1]:start[1]+size[1]]
    mask = np.zeros(img.shape)
    mask[40:49,40:49] = 1
    return roi, mask

In [91]:
def feature_extraction(img):
    roi_pos = [
        (160,300),
        (118,224),
        (241,151),
        (120,420),
        (170,300),
        (400,200),
        (300,120),
        (240,240),
        (360,160)
    ]
    
    roi_arr = []
    mask_arr = []
    for pos in roi_pos:
        roi, mask = extract_roi(img, pos)
        roi_arr.append(roi)
        mask_arr.append(mask)
    
    features = {}
    # 0 45 90 135 degrees
    angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    
    da_dict = {
        0: "d1_0",
        1: "d1_45",
        2: "d1_90",
        3: "d1_135",
        
        4: "d2_0",
        5: "d2_45",
        6: "d2_90",
        7: "d2_135",
        
        8: "d3_0",
        9: "d3_45",
        10: "d3_90",
        11: "d3_135",
        
    }
    
    for i in range(len(roi_pos)):
        glcm_mtx = greycomatrix(roi, distances = [1,2,3], angles = angles, levels = 256)
        con = greycoprops(glcm_mtx, 'contrast').flatten()
        hom = greycoprops(glcm_mtx, 'homogeneity').flatten()
        en = greycoprops(glcm_mtx, 'energy').flatten()
        corr = greycoprops(glcm_mtx, 'correlation').flatten()
        
        for j in range(len(da_dict)):
            features[f'r{i}_contrast_{da_dict[j]}'] = con[j]
            features[f'r{i}_homogeneity_{da_dict[j]}'] = hom[j]
            features[f'r{i}_energy_{da_dict[j]}'] = en[j]
            features[f'r{i}_correlation_{da_dict[j]}'] = corr[j]
            
        features[f'r{i}_entropy'] = shannon_entropy(roi)

        feat, labels = pyfeats.glrlm_features(img, mask, 256)
        glrlm = {l : f for l,f in zip(labels,feat)}
        features[f'r{i}_longRunEmphasis'] = glrlm['GLRLM_LongRunEmphasis']
        features[f'r{i}_runPercentage'] = glrlm['GLRLM_RunPercentage']
    return features

In [92]:
img = cv.imread('dataset/train/normal/n1.jpg', cv.IMREAD_GRAYSCALE)
columns = feature_extraction(img).keys()

In [93]:
def build_dataframe(images, columns):
    # dataframe consists of features of 1 ROI per image
    # column name roiNum_feature
#     data = pd.DataFrame( columns = [
#         'contrast',
#         'homogeneity',
#         'energy',
#         'correlation',
#         'entropy',
#         'longRunEmphasis',
#         'runPercentage',
#         'target'
#     ])
    data = pd.DataFrame( columns = columns)

    for cls in images:
        for img in images[cls]:
            row = feature_extraction(img)
            row['target'] = cls
            data = data.append(row,ignore_index=True)
    return data

In [94]:
%%time
images = read_images()

Wall time: 163 ms


In [95]:
%%time
data = build_dataframe(images,columns)

Wall time: 20min 31s


In [96]:
data.describe()

Unnamed: 0,r0_contrast_d1_0,r0_homogeneity_d1_0,r0_energy_d1_0,r0_correlation_d1_0,r0_contrast_d1_45,r0_homogeneity_d1_45,r0_energy_d1_45,r0_correlation_d1_45,r0_contrast_d1_90,r0_homogeneity_d1_90,...,r8_homogeneity_d3_90,r8_energy_d3_90,r8_correlation_d3_90,r8_contrast_d3_135,r8_homogeneity_d3_135,r8_energy_d3_135,r8_correlation_d3_135,r8_entropy,r8_longRunEmphasis,r8_runPercentage
count,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,...,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0
mean,22.610819,0.727436,0.649558,0.940038,29.808388,0.693362,0.648911,0.886455,45.214181,0.670119,...,0.64019,0.651442,0.609962,142.059291,0.634901,0.654827,0.592173,1.749918,1.475929,0.823281
std,128.12461,0.339539,0.419122,0.129889,144.461595,0.373565,0.419867,0.177482,143.400455,0.39956,...,0.434433,0.416297,0.519601,399.45036,0.439842,0.411915,0.537773,2.15027,0.3085,0.135401
min,0.0,0.064052,0.117851,-0.030188,0.0,0.066476,0.126938,-0.033761,0.0,0.039748,...,0.001389,0.136083,-0.69715,0.0,0.003818,0.142857,-0.732221,0.0,1.284691,0.707023
25%,0.0,0.408053,0.155281,0.919356,0.0,0.293125,0.152292,0.808571,0.0,0.233062,...,0.152623,0.157135,0.096619,0.0,0.144833,0.165792,0.036928,0.0,1.30196,0.711165
50%,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.315493,0.713206
75%,6.083333,1.0,1.0,1.0,11.34375,1.0,1.0,1.0,21.881944,1.0,...,1.0,1.0,1.0,37.387755,1.0,1.0,1.0,3.879655,1.542534,0.962127
max,1233.513889,1.0,1.0,1.0,1387.734375,1.0,1.0,1.0,1233.777778,1.0,...,1.0,1.0,1.0,2738.204082,1.0,1.0,1.0,5.74399,3.137791,1.079653


In [97]:
%%time
from sklearn.model_selection import train_test_split

X = data.drop(['target'],axis=1).values
y = data['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Wall time: 0 ns


In [98]:
%%time
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)

y_pred = dt.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 0 ns


0.6842105263157895

In [99]:
%%time
from sklearn.svm import SVC

svc = SVC()
svc.fit(X_train,y_train)

y_pred = svc.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 0 ns


0.5789473684210527

In [100]:
%%time
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train,y_train)

y_pred = rf.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 101 ms


0.631578947368421

In [101]:
report = classification_report(y_test,y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score    support
fatty          0.642857  0.818182  0.720000  11.000000
normal         0.600000  0.375000  0.461538   8.000000
accuracy       0.631579  0.631579  0.631579   0.631579
macro avg      0.621429  0.596591  0.590769  19.000000
weighted avg   0.624812  0.631579  0.611174  19.000000


In [102]:
data.to_csv("dataset/95img_9_roi_3d_4a.csv",index = False)