In [1]:
import cv2 as cv
import numpy as np
import os
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import pyfeats
import pandas as pd

In [2]:
def read_images(folder = "dataset/train",
                classes = [
                            "normal",
                            "fatty",
#                             "cirrhosis"
                        ]):
    image_names = {}
    images = {}
    # Get all image names in folders
    for cls in classes:
        image_names[cls] = os.listdir(f'{folder}/{cls}')

    # read all images to list
    for cls in classes:
        images[cls] = []
        for name in image_names[cls]:
            img = cv.imread(f'{folder}/{cls}/{name}', cv.IMREAD_GRAYSCALE)
            images[cls].append(img)
    return images

In [3]:
def extract_roi(img, start , size = (32,32)):
    roi = img[start[0]:start[0]+size[0],start[1]:start[1]+size[1]]
    mask = np.zeros(img.shape)
    mask[start[0]:start[0]+size[0],start[1]:start[1]+size[1]] = 1
    return roi, mask

In [4]:
def feature_extraction(img):
    roi_pos = [
        (160,230),
        (118,224),
        (241,151),
        (120,420),
        (170,300),
        (400,200),
        (300,120),
        (240,240),
        (360,160)
    ]
    
    roi_arr = []
    mask_arr = []
    for pos in roi_pos:
        roi, mask = extract_roi(img, pos)
        roi_arr.append(roi)
        mask_arr.append(mask)
    
    features = {}
    # 0 45 90 135 degrees
    angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    
    da_dict = {
        0: "d1_0",
        1: "d1_45",
        2: "d1_90",
        3: "d1_135",
        
        4: "d2_0",
        5: "d2_45",
        6: "d2_90",
        7: "d2_135",
        
        8: "d3_0",
        9: "d3_45",
        10: "d3_90",
        11: "d3_135",
        
    }
    
    for i in range(len(roi_pos)):
        glcm_mtx = greycomatrix(roi, distances = [1,2,3], angles = angles, levels = 256)
        con = greycoprops(glcm_mtx, 'contrast').flatten()
        hom = greycoprops(glcm_mtx, 'homogeneity').flatten()
        en = greycoprops(glcm_mtx, 'energy').flatten()
        corr = greycoprops(glcm_mtx, 'correlation').flatten()
        
        for j in range(len(da_dict)):
            features[f'r{i}_contrast_{da_dict[j]}'] = con[j]
            features[f'r{i}_homogeneity_{da_dict[j]}'] = hom[j]
            features[f'r{i}_energy_{da_dict[j]}'] = en[j]
            features[f'r{i}_correlation_{da_dict[j]}'] = corr[j]
            
        features[f'r{i}_entropy'] = shannon_entropy(roi)

        feat, labels = pyfeats.glrlm_features(img, mask, 256)
        glrlm = {l : f for l,f in zip(labels,feat)}
        features[f'r{i}_longRunEmphasis'] = glrlm['GLRLM_LongRunEmphasis']
        features[f'r{i}_runPercentage'] = glrlm['GLRLM_RunPercentage']
    return features

In [6]:
def build_dataframe(images):
    data = pd.DataFrame()

    for cls in images:
        for img in images[cls]:
            row = feature_extraction(img)
            row['target'] = cls
            data = data.append(row,ignore_index=True)
    return data

In [8]:
%%time
images = read_images()
data = build_dataframe(images)
data.describe()

Wall time: 10min 45s


In [10]:
%%time
from sklearn.model_selection import train_test_split

X = data.drop(['target'],axis=1).values
y = data['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Wall time: 2.83 s


In [None]:
# data = pd.read_csv("40img_9_roi_3d_4a.csv")

In [11]:
%%time
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)

y_pred = dt.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 1.39 s


0.75

In [12]:
%%time
from sklearn.svm import SVC

svc = SVC()
svc.fit(X_train,y_train)

y_pred = svc.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 169 ms


0.375

In [25]:
%%time
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train,y_train)

y_pred = rf.predict(X_test)

accuracy_score(y_test, y_pred)

Wall time: 100 ms


0.625

In [26]:
from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score  support
fatty           0.50000  0.666667  0.571429    3.000
normal          0.75000  0.600000  0.666667    5.000
accuracy        0.62500  0.625000  0.625000    0.625
macro avg       0.62500  0.633333  0.619048    8.000
weighted avg    0.65625  0.625000  0.630952    8.000


In [27]:
# data.to_csv("dataset/40img_9_roi_3d_4a_32p.csv",index = False)

In [29]:
data.head()

Unnamed: 0,r0_contrast_d1_0,r0_homogeneity_d1_0,r0_energy_d1_0,r0_correlation_d1_0,r0_contrast_d1_45,r0_homogeneity_d1_45,r0_energy_d1_45,r0_correlation_d1_45,r0_contrast_d1_90,r0_homogeneity_d1_90,...,r8_energy_d3_90,r8_correlation_d3_90,r8_contrast_d3_135,r8_homogeneity_d3_135,r8_energy_d3_135,r8_correlation_d3_135,r8_entropy,r8_longRunEmphasis,r8_runPercentage,target
0,89.731855,0.939718,0.886578,0.033166,93.103018,0.914784,0.869653,0.027936,90.546371,0.904934,...,0.821731,-0.004318,102.082222,0.873623,0.832643,-0.000214,0.703808,2.356688,0.837976,normal
1,5.013105,0.395582,0.105698,0.869536,10.825182,0.318207,0.090925,0.718623,18.164315,0.24627,...,0.078774,0.306302,27.078889,0.227901,0.080416,0.290634,4.105821,1.559836,0.92063,normal
2,5.683468,0.400243,0.097256,0.875538,12.657648,0.285346,0.081585,0.712816,22.167339,0.227407,...,0.072206,0.275135,28.236667,0.206442,0.072538,0.342274,4.232786,1.49615,0.935151,normal
3,16.291331,0.284726,0.064169,0.920232,45.552549,0.178006,0.053557,0.776029,85.03629,0.126718,...,0.047365,0.112876,175.201111,0.085479,0.048177,0.12752,5.149397,1.502168,0.933964,normal
4,13.194556,0.297156,0.069724,0.927491,40.473465,0.196302,0.05843,0.777981,68.730847,0.149869,...,0.051069,0.076517,166.511111,0.099671,0.051759,0.093918,4.961775,1.821516,1.053454,normal
