In [1]:
import cv2
import os
from os import listdir
from os.path import isfile, join
import numpy as np
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
from skimage.feature import  greycomatrix, greycoprops
import re
from tabulate import tabulate


In [21]:
from sklearn import preprocessing
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import confusion_matrix  
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from skimage.feature import greycomatrix, greycoprops
from skimage.measure import shannon_entropy
import time
import threading
rf_class = RandomForestClassifier(n_estimators=100)
abc = AdaBoostClassifier(n_estimators=100,learning_rate=1)
xgb = XGBClassifier(
 learning_rate =0.1,
 n_estimators=500,
 max_depth=10,
 min_child_weight=1,
 gamma=0.1,
 subsample=0.8,
 colsample_bytree=0.8,
 nthread=4,
 seed=27)
svm_linear=svm.SVC(kernel='linear',gamma=0.001,C=100)
model_names={"Random Forest":rf_class,"AdaBoostClassifier":abc,"XGBClassifier":xgb,"SVM_linear":svm_linear}
class_names=['mild','normal','severe']


In [19]:
class model:    
    def __init__(self,dataset,**kwargs):
        size = None
        if len(kwargs.items())!=0:
           size = kwargs['size']
        self.dataset = dataset
        self.SIFT, self.GLCM, self.SIFT_AND_GLCM,self.labels = self.generateFeatureDataset(self.dataset,size)
    
    def generateFeatureDataset(self,dataset,size):
        t =time.time()
        subdirs = [x[0] for x in os.walk(dataset)]
        file_names=[]
        for sub_dir in subdirs[1:4]:
            mypath = sub_dir
            sub_folder_file_names = [mypath+"/"+f for f in listdir(mypath) if isfile(join(mypath, f))]
            file_names += sub_folder_file_names
        min_dim = []
        images_sift = []
        glcm=[]
        labels = []
        size = size
        sift = cv2.xfeatures2d.SIFT_create()
        for i, file in enumerate(file_names):
            image = cv2.imread(file,0)
            if size is not None:
                image = cv2.resize(image, (size, size), interpolation = cv2.INTER_AREA)
            glcm.append(self.glcm2D(image))
            descriptors = self.sift2D(image,sift)
            images_sift.append(descriptors)
            min_dim.append(len(descriptors))
            if re.split(r'[`[\]\'\\/]', file_names[i])[-2]=='mild':
                labels.append(0)
            elif re.split(r'[`[\]\'\\/]', file_names[i])[-2] == 'normal':
                labels.append(1)
            else :
                labels.append(2)

        images_sift_final = [features[:min(min_dim)] for features in images_sift]
        images_sift_final =np.array(images_sift_final)
        glcm=np.array(glcm)
        images_sift_glcm=np.concatenate((images_sift_final,glcm),axis=1)
        print(time.time()-t)
        return images_sift_final,glcm,images_sift_glcm,labels
    
    def sift2D(self,image,sift):
        keypoints, descriptors = sift.detectAndCompute(image,None)
        descriptors=np.array(descriptors)      
        descriptors=descriptors.reshape(-1)
        return descriptors
    def glcm2D(self,image):
        img_arr = np.array(image)
        gCoMat = greycomatrix(img_arr, [1], [0],256,symmetric=True, normed=True) # Co-occurance matrix
        contrast = greycoprops(gCoMat, prop='contrast')[0][0]
        dissimilarity = greycoprops(gCoMat, prop='dissimilarity')[0][0]
        homogeneity = greycoprops(gCoMat, prop='homogeneity')[0][0]
        energy = greycoprops(gCoMat, prop='energy')[0][0]
        correlation = greycoprops(gCoMat, prop='correlation')[0][0]
        entropy = shannon_entropy(img_arr)
        return [contrast,dissimilarity,homogeneity,energy,correlation,entropy]
        
    
    def accuracy(self,model_name,X_train, X_test, y_train, y_test):
        model=model_names[model_name]
        model.fit(X_train,y_train)
        yhat = model.predict(X_test)
        acc = accuracy_score(y_test, yhat)
        acc = str(round(acc, 2)*100) + ' %'
        return [model_name,acc]
    
    def classification_report(self,model_name,X_train, X_test, y_train, y_test):
        model=model_names[model_name]
        model.fit(X_train,y_train)
        yhat = model.predict(X_test)
        print(classification_report(y_test, yhat, target_names=class_names))
        print(confusion_matrix(y_test, yhat))
        acc = accuracy_score(y_test, yhat)
        acc = str(round(acc, 2)*100) + ' %'
        return [model_name,acc]
    
    
    def result(self,dataset,metrics,model_used = ["Random Forest","AdaBoostClassifier","XGBClassifier","SVM_linear"]):
        min_max_scaler = preprocessing.StandardScaler()
        x_scaled = min_max_scaler.fit_transform(dataset)
        df=pd.DataFrame(data=x_scaled)
        df['label']=self.labels
        df=df.sample(frac=1)
        X=df.drop(['label'], axis = 1)
        y=df['label']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
        model_result = []       
        for model in model_used:
            model_result.append(getattr(self, metrics)(model,X_train, X_test, y_train, y_test))
        print(tabulate(model_result, headers=['Model', 'Result'], tablefmt='orgtbl'))

        
        
        
    def run(self,**kwargs):
        metrics ='accuracy'
        dataset_use = ['SIFT','GLCM','SIFT_AND_GLCM']
        if 'metrics' in kwargs.keys():
            metrics = kwargs['metrics']
        if 'dataset_use' in kwargs.keys():
            dataset_use = kwargs['dataset_use']
        for dataset in dataset_use:
            print("\n")
            print('\t----{0}----'.format(dataset))
            print("\n")
            return self.result(getattr(self, dataset),metrics)
            print("---------------------------------------------------------------------------------------")

              
                  

In [26]:

print("***************Running 224 preprocessed images******************")

model_224=model('../Deep learning/preprocessed images_224',size=128)
model_224.run(metrics='classification_report',dataset_use=['SIFT_AND_GLCM'])

    

***************Running 224 preprocessed images******************
5.586017608642578


	----SIFT_AND_GLCM----


              precision    recall  f1-score   support

        mild       0.79      0.90      0.84        50
      normal       0.71      0.67      0.69        30
      severe       0.88      0.73      0.80        30

    accuracy                           0.79       110
   macro avg       0.79      0.77      0.78       110
weighted avg       0.79      0.79      0.79       110

[[45  5  0]
 [ 7 20  3]
 [ 5  3 22]]
              precision    recall  f1-score   support

        mild       0.84      0.94      0.89        50
      normal       0.82      0.77      0.79        30
      severe       0.92      0.80      0.86        30

    accuracy                           0.85       110
   macro avg       0.86      0.84      0.85       110
weighted avg       0.86      0.85      0.85       110

[[47  3  0]
 [ 5 23  2]
 [ 4  2 24]]
              precision    recall  f1-score   support


In [None]:
print("***************Running 224 preprocessed images******************")

model_224=model('../Deep learning/preprocessed images_224',size=128)
model_224.run(metrics='classification_report',dataset_use=['GLCM'])

In [None]:
print("***************Running 40 preprocessed images******************")

model_40=model('../Deep learning/preprocessed_images_40',size=128)
model_40.run()

In [28]:
print("***************Running original preprocessed images******************")

model_1000=model('../Deep learning/preprocessed images')
model_1000.run()

***************Running original preprocessed images******************


	---SIFT---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 45.0%    |
| AdaBoostClassifier | 47.0%    |
| XGBClassifier      | 51.0%    |


	---Glcm---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 83.0%    |
| AdaBoostClassifier | 66.0%    |
| XGBClassifier      | 82.0%    |


	---SIFT + GLCM---
| Model              | Result             |
|--------------------+--------------------|
| Random Forest      | 66.0%              |
| AdaBoostClassifier | 56.99999999999999% |
| XGBClassifier      | 84.0%              |


In [42]:
print("***************Running original Dataset images******************")

model_org=model('../Deep learning/Dataset')
model_org.run()

***************Running original Dataset images******************


	---SIFT---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 39.0%    |
| AdaBoostClassifier | 43.0%    |
| XGBClassifier      | 45.0%    |


	---Glcm---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 95.0%    |
| AdaBoostClassifier | 63.0%    |
| XGBClassifier      | 93.0%    |


	---SIFT + GLCM---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 52.0%    |
| AdaBoostClassifier | 76.0%    |
| XGBClassifier      | 95.0%    |


In [None]:


# import pandas as pd
# target_df = pd.get_dummies(labels,prefix="label")
# target_df.head()