In [1]:
import cv2
import os
from os import listdir
from os.path import isfile, join
import numpy as np
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
from skimage.feature import  greycomatrix, greycoprops
import re
from tabulate import tabulate


In [2]:
from sklearn import preprocessing
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import confusion_matrix  
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier


rf_class = RandomForestClassifier(n_estimators=100)
abc = AdaBoostClassifier(n_estimators=100,learning_rate=1)
xgb = XGBClassifier(
 learning_rate =0.1,
 n_estimators=500,
 max_depth=10,
 min_child_weight=1,
 gamma=0.1,
 subsample=0.8,
 colsample_bytree=0.8,
 nthread=4,
 seed=27)

model_names={"Random Forest":rf_class,"AdaBoostClassifier":abc,"XGBClassifier":xgb}
    



In [3]:
class model:
    rf_class = RandomForestClassifier(n_estimators=100)
    abc = AdaBoostClassifier(n_estimators=100,learning_rate=1)
    xgb = XGBClassifier(learning_rate =0.1,
     n_estimators=500,
     max_depth=10,
     min_child_weight=1,
     gamma=0.1,
     subsample=0.8,
     colsample_bytree=0.8,
     nthread=4,
     seed=27)

    model_names={"Random Forest":rf_class,"AdaBoostClassifier":abc,"XGBClassifier":xgb}
    def __init__(self,dataset,**kwargs):
        size = None
        if len(kwargs.items())!=0:
           size = kwargs['size']
        self.dataset = dataset
        self.images_sift, self.glcm, self.images_sift_glcm,self.labels = self.generateFeatureDataset(self.dataset,size)
    
    def generateFeatureDataset(self,dataset,size):
        subdirs = [x[0] for x in os.walk(dataset)]
        file_names=[]
        for sub_dir in subdirs[1:4]:
            mypath = sub_dir
            sub_folder_file_names = [mypath+"/"+f for f in listdir(mypath) if isfile(join(mypath, f))]
            file_names += sub_folder_file_names
        min_dim = []
        images_sift = []
        glcm=[]
        labels = []
        size = size
        sift = cv2.xfeatures2d.SIFT_create()
        for i, file in enumerate(file_names):
            image = cv2.imread(file,0)
            if size is not None:
                image = cv2.resize(image, (size, size), interpolation = cv2.INTER_AREA)
            img_arr = np.array(image)
            gCoMat = greycomatrix(img_arr, [1], [0],256,symmetric=True, normed=True) # Co-occurance matrix
            contrast = greycoprops(gCoMat, prop='contrast')[0][0]
            dissimilarity = greycoprops(gCoMat, prop='dissimilarity')[0][0]
            homogeneity = greycoprops(gCoMat, prop='homogeneity')[0][0]
            energy = greycoprops(gCoMat, prop='energy')[0][0]
            correlation = greycoprops(gCoMat, prop='correlation')[0][0]
            keypoints, descriptors = sift.detectAndCompute(image,None)
            descriptors=np.array(descriptors)      
            descriptors=descriptors.reshape(-1)
            min_dim.append(len(descriptors))
            glcm.append([contrast,dissimilarity,homogeneity,energy,correlation])
            images_sift.append(descriptors)
            if re.split(r'[`[\]\'\\/]', file_names[i])[-2]=='mild':
                labels.append(0)
            elif re.split(r'[`[\]\'\\/]', file_names[i])[-2] == 'normal':
                labels.append(1)
            else :
                labels.append(2)

        images_sift_final = [features[:min(min_dim)] for features in images_sift]
        images_sift_final =np.array(images_sift_final)
        glcm=np.array(glcm)
        images_sift_glcm=np.concatenate((images_sift_final,glcm),axis=1)
        return images_sift_final,glcm,images_sift_glcm,labels
    
    def testing(self,model_name,X_train, X_test, y_train, y_test):
        model=model_names[model_name]
        model.fit(X_train,y_train)
        yhat = model.predict(X_test)
        acc = accuracy_score(y_test, yhat)
        acc = str(round(acc, 2)*100) + '%'
        return [model_name,acc]
    
    
    def result(self,dataset):
        min_max_scaler = preprocessing.StandardScaler()
        x_scaled = min_max_scaler.fit_transform(dataset)
        df=pd.DataFrame(data=x_scaled)
        df['label']=self.labels
        df=df.sample(frac=1)
        X=df.drop(['label'], axis = 1)
        y=df['label']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
        model_result = []
        for model in ["Random Forest","AdaBoostClassifier","XGBClassifier"]:
            model_result.append(self.testing(model,X_train, X_test, y_train, y_test))
        print(tabulate(model_result, headers=['Model', 'Result'], tablefmt='orgtbl'))
        
        
        
    def run(self):
        print("\n")
        print("\t---SIFT---")
        self.result(self.images_sift)
        print("\n")
        print("\t---Glcm---")
        self.result(self.glcm)
        print("\n")
        print("\t---SIFT + GLCM---")
        self.result(self.images_sift_glcm)
              
                  

In [53]:
print("***************Running 224 preprocessed images******************")

model_224=model('../Deep learning/preprocessed images_224')
model_224.run()

***************Running 224 preprocessed images******************


	---SIFT---
| Model              | Result             |
|--------------------+--------------------|
| Random Forest      | 53.0%              |
| AdaBoostClassifier | 40.0%              |
| XGBClassifier      | 55.00000000000001% |


	---Glcm---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 94.0%    |
| AdaBoostClassifier | 94.0%    |
| XGBClassifier      | 95.0%    |


	---SIFT + GLCM---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 81.0%    |
| AdaBoostClassifier | 85.0%    |
| XGBClassifier      | 95.0%    |


In [6]:
print("***************Running 40 preprocessed images******************")

model_40=model('../Deep learning/preprocessed_images_40',size=128)
model_40.run()

***************Running 40 preprocessed images******************


	---SIFT---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 33.0%    |
| AdaBoostClassifier | 40.0%    |
| XGBClassifier      | 45.0%    |


	---Glcm---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 85.0%    |
| AdaBoostClassifier | 81.0%    |
| XGBClassifier      | 85.0%    |


	---SIFT + GLCM---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 45.0%    |
| AdaBoostClassifier | 64.0%    |
| XGBClassifier      | 70.0%    |


In [28]:
print("***************Running original preprocessed images******************")

model_1000=model('../Deep learning/preprocessed images')
model_1000.run()

***************Running original preprocessed images******************


	---SIFT---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 45.0%    |
| AdaBoostClassifier | 47.0%    |
| XGBClassifier      | 51.0%    |


	---Glcm---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 83.0%    |
| AdaBoostClassifier | 66.0%    |
| XGBClassifier      | 82.0%    |


	---SIFT + GLCM---
| Model              | Result             |
|--------------------+--------------------|
| Random Forest      | 66.0%              |
| AdaBoostClassifier | 56.99999999999999% |
| XGBClassifier      | 84.0%              |


In [42]:
print("***************Running original Dataset images******************")

model_org=model('../Deep learning/Dataset')
model_org.run()

***************Running original Dataset images******************


	---SIFT---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 39.0%    |
| AdaBoostClassifier | 43.0%    |
| XGBClassifier      | 45.0%    |


	---Glcm---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 95.0%    |
| AdaBoostClassifier | 63.0%    |
| XGBClassifier      | 93.0%    |


	---SIFT + GLCM---
| Model              | Result   |
|--------------------+----------|
| Random Forest      | 52.0%    |
| AdaBoostClassifier | 76.0%    |
| XGBClassifier      | 95.0%    |


In [None]:


# import pandas as pd
# target_df = pd.get_dummies(labels,prefix="label")
# target_df.head()