In [18]:
import cv2 as cv
import numpy as np
import os
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import pyfeats
import pandas as pd

In [19]:
def read_images(folder = "dataset/train",
                classes = [
                            "normal",
                            "fatty",
#                             "cirrhosis"
                        ]):
    image_names = {}
    images = {}
    # Get all image names in folders
    for cls in classes:
        image_names[cls] = os.listdir(f'{folder}/{cls}')

    # read all images to list
    for cls in classes:
        images[cls] = []
        for name in image_names[cls]:
            img = cv.imread(f'{folder}/{cls}/{name}', cv.IMREAD_GRAYSCALE)
            images[cls].append(img)
    return images

In [20]:
def extract_roi(img, start =(160,300) , size = (9,9)):
    roi = img[start[0]:start[0]+size[0],start[1]:start[1]+size[1]]
    mask = np.zeros(img.shape)
    mask[start[0]:start[0]+size[0],start[1]:start[1]+size[1]] = 1
    return roi, mask

In [21]:
def feature_extraction(img):
    roi_pos = [
        (160,300),
        (118,224),
        (241,151),
        (120,420),
        (170,300),
        (400,200),
        (300,120),
        (240,240),
        (360,160)
    ]
    
    roi_arr = []
    mask_arr = []
    for pos in roi_pos:
        roi, mask = extract_roi(img, pos)
        roi_arr.append(roi)
        mask_arr.append(mask)
    
    features = {}
    # 0 45 90 135 degrees
    angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    
    da_dict = {
        0: "d1_0",
        1: "d1_45",
        2: "d1_90",
        3: "d1_135",
        
        4: "d2_0",
        5: "d2_45",
        6: "d2_90",
        7: "d2_135",
        
        8: "d3_0",
        9: "d3_45",
        10: "d3_90",
        11: "d3_135",
        
    }
    
    for i in range(len(roi_pos)):
        glcm_mtx = greycomatrix(roi, distances = [1,2,3], angles = angles, levels = 256)
        con = greycoprops(glcm_mtx, 'contrast').flatten()
        hom = greycoprops(glcm_mtx, 'homogeneity').flatten()
        en = greycoprops(glcm_mtx, 'energy').flatten()
        corr = greycoprops(glcm_mtx, 'correlation').flatten()
        
        for j in range(len(da_dict)):
            features[f'r{i}_contrast_{da_dict[j]}'] = con[j]
            features[f'r{i}_homogeneity_{da_dict[j]}'] = hom[j]
            features[f'r{i}_energy_{da_dict[j]}'] = en[j]
            features[f'r{i}_correlation_{da_dict[j]}'] = corr[j]
            
        features[f'r{i}_entropy'] = shannon_entropy(roi)

        feat, labels = pyfeats.glrlm_features(img, mask, 256)
        glrlm = {l : f for l,f in zip(labels,feat)}
        features[f'r{i}_longRunEmphasis'] = glrlm['GLRLM_LongRunEmphasis']
        features[f'r{i}_runPercentage'] = glrlm['GLRLM_RunPercentage']
    return features

In [22]:
img = cv.imread('dataset/train/normal/n1.jpg', cv.IMREAD_GRAYSCALE)
columns = feature_extraction(img).keys()

In [23]:
def build_dataframe(images, columns):
    # dataframe consists of features of 1 ROI per image
    # column name roiNum_feature
#     data = pd.DataFrame( columns = [
#         'contrast',
#         'homogeneity',
#         'energy',
#         'correlation',
#         'entropy',
#         'longRunEmphasis',
#         'runPercentage',
#         'target'
#     ])
    data = pd.DataFrame( columns = columns)

    for cls in images:
        for img in images[cls]:
            row = feature_extraction(img)
            row['target'] = cls
            data = data.append(row,ignore_index=True)
    return data

In [24]:
%%time
images = read_images()

Wall time: 78 ms


In [25]:
%%time
data = build_dataframe(images,columns)

Wall time: 10min 40s


In [26]:
data.describe()

Unnamed: 0,r0_contrast_d1_0,r0_homogeneity_d1_0,r0_energy_d1_0,r0_correlation_d1_0,r0_contrast_d1_45,r0_homogeneity_d1_45,r0_energy_d1_45,r0_correlation_d1_45,r0_contrast_d1_90,r0_homogeneity_d1_90,...,r8_homogeneity_d3_90,r8_energy_d3_90,r8_correlation_d3_90,r8_contrast_d3_135,r8_homogeneity_d3_135,r8_energy_d3_135,r8_correlation_d3_135,r8_entropy,r8_longRunEmphasis,r8_runPercentage
count,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,...,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
mean,53.700694,0.352659,0.1677,0.85759,70.794922,0.271734,0.166164,0.730331,107.383681,0.216534,...,0.145452,0.172176,0.073659,337.390816,0.132889,0.180213,0.03141,4.156056,1.711326,0.974923
std,194.561583,0.170415,0.109821,0.169179,217.54073,0.146363,0.109755,0.180947,206.679941,0.142518,...,0.145862,0.103841,0.37576,563.017806,0.140821,0.099493,0.374125,0.942207,0.361938,0.056691
min,1.916667,0.064052,0.117851,-0.030188,2.46875,0.066476,0.126938,-0.033761,5.291667,0.039748,...,0.001389,0.136083,-0.69715,5.918367,0.003818,0.142857,-0.732221,0.707324,1.403044,0.837976
25%,4.225694,0.198545,0.126913,0.829287,8.238281,0.16417,0.132119,0.628852,14.475694,0.112088,...,0.059328,0.143444,-0.135037,28.887755,0.045308,0.147866,-0.118102,3.635395,1.51034,0.934854
50%,8.506944,0.343895,0.147639,0.896952,21.679688,0.254807,0.146575,0.758672,41.125,0.193292,...,0.127519,0.153822,0.048513,76.663265,0.112095,0.161985,-0.022227,4.069086,1.559616,0.971073
75%,32.756944,0.460296,0.168673,0.945802,57.097656,0.343898,0.165359,0.850744,99.541667,0.262496,...,0.180175,0.165635,0.207968,421.403061,0.164022,0.179081,0.19845,4.998307,1.775538,1.019465
max,1233.513889,0.881947,0.822616,0.989035,1387.734375,0.889065,0.830627,0.984288,1233.777778,0.866669,...,0.842596,0.801234,0.946874,2738.204082,0.826534,0.781662,0.905079,5.74399,3.137791,1.079653


In [27]:
%%time
from sklearn.model_selection import train_test_split

X = data.drop(['target'],axis=1).values
y = data['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Wall time: 0 ns


In [28]:
%%time
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)

y_pred = dt.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 0 ns


0.5

In [29]:
%%time
from sklearn.svm import SVC

svc = SVC()
svc.fit(X_train,y_train)

y_pred = svc.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 0 ns


0.375

In [30]:
%%time
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train,y_train)

y_pred = rf.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 84.6 ms


0.625

In [31]:
from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score  support
fatty           0.50000  0.666667  0.571429    3.000
normal          0.75000  0.600000  0.666667    5.000
accuracy        0.62500  0.625000  0.625000    0.625
macro avg       0.62500  0.633333  0.619048    8.000
weighted avg    0.65625  0.625000  0.630952    8.000


In [32]:
data.to_csv("dataset/40img_9_roi_3d_4a_1.csv",index = False)