In [114]:
import cv2
import os
import random
import pandas as pd
import numpy as np
from skimage import feature
from sklearn.metrics import (f1_score, accuracy_score, confusion_matrix)
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import (RBF, Matern)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier

In [115]:
def criarListaImagens(listaCategorias, data_dir, img_size):
  lista = []
  for categoria in listaCategorias:
    path = os.path.join(data_dir, categoria)
    class_num = listaCategorias.index(categoria)
    for img in os.listdir(path):
      try:
        img_array = cv2.imread(os.path.join(path, img))
        new_array = cv2.resize(img_array, (img_size, img_size))
        lista.append([new_array, class_num])
      except Exception as e:
        print(e)
        pass
  return lista

#----------------------------------------------------------------------#

def criaListaTreinoTeste(lista):
  classes = []
  imagens = []
  for imagem in lista:
    imagens.append(imagem[0])
    classes.append(imagem[1])
  return imagens, classes

#----------------------------------------------------------------------#

def obtemCanaisCoresRGB(listaOrigem):
  listaDestino = []
  for imagem in listaOrigem:
    aux = cv2.split(imagem)
    aux.append(cv2.cvtColor(imagem, cv2.COLOR_BGR2GRAY))
    listaDestino.append(aux)

  return listaDestino

#----------------------------------------------------------------------#

def obterCanaisCores(listaOrigem, cvt):
  listaDestino = []
  for imagem in listaOrigem:    
    i = cv2.cvtColor(imagem, cvt)
    aux = cv2.split(i)
    aux = cv2.split(i)
    aux.append(cv2.cvtColor(i, cv2.COLOR_BGR2GRAY))
    listaDestino.append(aux)

  return listaDestino

#----------------------------------------------------------------------#

class LocalBinaryPatterns:
    def __init__(self, numPoints, radius):
        self.numPoints = numPoints
        self.radius = radius
    def describe(self, image, eps=1e-7):
        lbp = feature.local_binary_pattern(image, self.numPoints,
            self.radius, method="uniform")
        (hist, _) = np.histogram(lbp.ravel(),
            bins=np.arange(0, self.numPoints + 3),
            range=(0, self.numPoints + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
        return hist

#----------------------------------------------------------------------#

def obtemHistogramaCanais(listaImagens, lbp):
  canal_1 = []
  canal_2 = []
  canal_3 = []
  canal_cinza = []
  for imagem in listaImagens:
    canal_1.append(lbp.describe(imagem[0]))
    canal_2.append(lbp.describe(imagem[1]))
    canal_3.append(lbp.describe(imagem[2]))
    canal_cinza.append(lbp.describe(imagem[3]))
  return canal_1, canal_2, canal_3, canal_cinza

#----------------------------------------------------------------------#

def obterListaTreino(listaIndiceTreino, listaIndiceTeste, listaImagens, classes):
    listaTreino=[]
    listaTeste=[]
    classesTreino = []
    classesTeste = []
    for i in listaIndiceTreino:
        listaTreino.append(listaImagens[int(i)])
        classesTreino.append(classes[int(i)])

    for i in listaIndiceTeste:
        listaTeste.append(listaImagens[int(i)])
        classesTeste.append(classes[int(i)])
    return listaTreino, listaTeste, classesTreino, classesTeste

In [116]:

names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest","AdaBoost", "Naive Bayes"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
        decision_function_shape='ovo', degree=3, gamma='scale', kernel='linear',
        max_iter=-1, probability=False, random_state=109, shrinking=True, tol=0.001,
        verbose=False),
    SVC(C=10000, cache_size=200, class_weight=None, coef0=0.0,
        decision_function_shape='ovo', degree=3, gamma=1, kernel='rbf', max_iter=-1,
        probability=False, random_state=109, shrinking=True, tol=0.001,verbose=False),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(class_weight=None, criterion='entropy',
        max_depth=3, max_features=None, max_leaf_nodes=None,
        min_impurity_decrease=0.0, min_impurity_split=None,
        min_samples_leaf=50, min_samples_split=2,
        min_weight_fraction_leaf=0.0,
        random_state=None, splitter='best'),
    RandomForestClassifier(bootstrap=False, class_weight=None,
        criterion='gini', max_depth=30, max_features='auto',
        max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None,
        min_samples_leaf=4, min_samples_split=2,
        min_weight_fraction_leaf=0.0, n_estimators=600,
        n_jobs=None, oob_score=False, random_state=None),
    AdaBoostClassifier(),
    GaussianNB()
]

#----------------------------------------------------------------------#

def xgboost(listaTreino, listaTeste, classesTreino, classesTeste):
    m = XGBClassifier(colsample_bytree= 0.7, 
        gamma= 0.2, learning_rate= 0.15, max_depth= 5, 
        min_child_weight= 1, eta= 0.3 ).fit(listaTreino, classesTreino)
    preds = m.predict(listaTeste)
    y_pred = m.predict(listaTeste)     
    acc = accuracy_score(classesTeste, y_pred)
    f1  = f1_score(classesTeste, y_pred, average='weighted')
    cm  = confusion_matrix(classesTeste, y_pred)
    return acc, f1, cm

#----------------------------------------------------------------------#

def utilizarClassificadores(listaTreino, classesTreino, listaTeste, classesTeste, namesClassifiers, classifiers):
    listaAccuracy = {}
    listaCM = {}
    listaF1Score = {}
    for i, c in enumerate(classifiers):
        m = c.fit(listaTreino, classesTreino)
        y_pred = m.predict(listaTeste)         
        listaAccuracy[namesClassifiers[i]] = accuracy_score(classesTeste, y_pred)
        listaF1Score[namesClassifiers[i]]  = f1_score(classesTeste, y_pred, average='weighted')
        listaCM[namesClassifiers[i]]       = confusion_matrix(classesTeste, y_pred)
    listaAccuracy["XGBoost"], listaF1Score["XGBoost"], listaCM["XGBoost"] = xgboost(np.asarray(listaTreino), np.asarray(listaTeste), np.asarray(classesTreino), np.asarray(classesTeste))
    return listaAccuracy , listaF1Score, listaCM


In [117]:
def classificarImagens(pontos, raio):
    _lbp = LocalBinaryPatterns(pontos, raio)

    lista_treino_rgb_b, lista_treino_rgb_g, lista_treino_rgb_r, lista_treino_rgb_gray = obtemHistogramaCanais(listaRgbTreino, _lbp)
    lista_treino_hsv_h, lista_treino_hsv_s, lista_treino_hsv_v, _ = obtemHistogramaCanais(listaHsvTreino, _lbp)
    lista_treino_lab_l, lista_treino_lab_a, lista_treino_lab_b, _ = obtemHistogramaCanais(listaLabTreino, _lbp)

    lista_teste_rgb_b, lista_teste_rgb_g, lista_teste_rgb_r, lista_teste_rgb_gray = obtemHistogramaCanais(listaRgbTeste, _lbp)
    lista_teste_hsv_h, lista_teste_hsv_s, lista_teste_hsv_v, _ = obtemHistogramaCanais(listaHsvTeste, _lbp)
    lista_teste_lab_l, lista_teste_lab_a, lista_teste_lab_b, _ = obtemHistogramaCanais(listaLabTeste, _lbp)

    linha = []
    listaF1Score = []
    listaCM = [] 
    conf_matrix = []

    #('------------------ RGB ----------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_rgb_gray, classes_treino, lista_teste_rgb_gray, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    #('------------------ RGB - R ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_rgb_r, classes_treino, lista_teste_rgb_r, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    #('------------------ RGB - G ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_rgb_g, classes_treino, lista_teste_rgb_g, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    #('------------------ RGB - B ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_rgb_b, classes_treino, lista_teste_rgb_b, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)

    #('------------------ HSV - H ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_hsv_h, classes_treino, lista_teste_hsv_h, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    #('------------------ HSV - S ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_hsv_s, classes_treino, lista_teste_hsv_s, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    #('------------------ HSV - V ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_hsv_v, classes_treino, lista_teste_hsv_v, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    
     #('------------------ LAB - L ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_lab_l, classes_treino, lista_teste_lab_l, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    
    #('------------------ LAB - A ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_lab_a, classes_treino, lista_teste_lab_a, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    
    #('------------------ LAB - B ------------')
    l, f1, cm = pd.Series(utilizarClassificadores(lista_treino_lab_b, classes_treino, lista_teste_lab_b, classes_teste, names, classifiers))
    linha.append(l)
    listaF1Score.append(f1)
    listaCM.append(cm)
    
    return linha, listaF1Score, listaCM

In [118]:
def obterResultados (pontos, raio):
    listaAcc, listaF1Score, listaCM = classificarImagens(pontos, raio)
    return pd.DataFrame(listaAcc), pd.DataFrame(listaF1Score), pd.DataFrame(listaCM)

In [119]:
data_dir = r"C:\Users\Rosana\Documents\Mestrado\DataSets\RIM_ONE_v2"
categorias = ["glaucoma","normal"]
img_size = 200
listaImagens = criarListaImagens(categorias, data_dir, img_size)
imagens, classes = criaListaTreinoTeste(listaImagens)

In [122]:
count = 1
c = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest","AdaBoost", "Naive Bayes", "XGBoost"]

pdAcc_8_1 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdf1_8_1 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdAcc_16_3 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdf1_16_3 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdAcc_24_3 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdf1_24_3 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdAcc_24_8 = pd.DataFrame(np.zeros([10,9]), columns=c)
pdf1_24_8 = pd.DataFrame(np.zeros([10,9]), columns=c)

for i in range(5):

    indiceTreino = np.loadtxt('id_train_img_'+str(count)+'.out')
    indiceTeste = np.loadtxt('id_test_img_'+str(count)+'.out')
    listaTreino, listaTeste, classes_treino, classes_teste = obterListaTreino(indiceTreino, indiceTeste, imagens, classes)
    
    listaRgbTreino = obtemCanaisCoresRGB(listaTreino)
    listaHsvTreino = obterCanaisCores(listaTreino, cv2.COLOR_BGR2HSV)
    listaLabTreino = obterCanaisCores(listaTreino, cv2.COLOR_BGR2LAB)
    listaLuvTreino = obterCanaisCores(listaTreino, cv2.COLOR_BGR2LUV) 

    listaRgbTeste = obtemCanaisCoresRGB(listaTeste)
    listaHsvTeste = obterCanaisCores(listaTeste, cv2.COLOR_BGR2HSV)
    listaLabTeste = obterCanaisCores(listaTeste, cv2.COLOR_BGR2LAB)
    listaLuvTeste = obterCanaisCores(listaTeste, cv2.COLOR_BGR2LUV)
    
    pd_acc, pd_f1, pd_cm = obterResultados(8,1)
    pd_acc.to_csv('df_acc_8_1_'+str(count)+'.csv')
    pd_f1.to_csv('df_f1_8_1_'+str(count)+'.csv')
    pd_cm.to_csv('df_cm_8_1_'+str(count)+'.csv')
    
    pdAcc_8_1 += pd_acc
    pdf1_8_1 += pd_f1

    pd_acc, pd_f1, pd_cm = obterResultados(16,3)
    pd_acc.to_csv('df_acc_16_3_'+str(count)+'.csv')
    pd_f1.to_csv('df_f1_16_3_'+str(count)+'.csv')
    pd_cm.to_csv('df_cm_16_3_'+str(count)+'.csv')
    
    pdAcc_16_3 += pd_acc
    pdf1_16_3 += pd_f1
    
    pd_acc, pd_f1, pd_cm = obterResultados(24,3)
    pd_acc.to_csv('df_acc_24_3_'+str(count)+'.csv')
    pd_f1.to_csv('df_f1_24_3_'+str(count)+'.csv')
    pd_cm.to_csv('df_cm_24_3_'+str(count)+'.csv')

    pdAcc_24_3 += pd_acc
    pdf1_24_3 += pd_f1
    
    pd_acc, pd_f1, pd_cm = obterResultados(24,8)
    pd_acc.to_csv('df_acc_24_8_'+str(count)+'.csv')
    pd_f1.to_csv('df_f1_24_8_'+str(count)+'.csv')
    pd_cm.to_csv('df_cm_24_8_'+str(count)+'.csv')

    pdAcc_24_8 += pd_acc
    pdf1_24_8 += pd_f1

    count += 1


display(pdAcc_8_1/5)
display(pdAcc_16_3/5)
display(pdAcc_24_3/5)
display(pdAcc_24_8/5)


Unnamed: 0,Nearest Neighbors,Linear SVM,RBF SVM,Gaussian Process,Decision Tree,Random Forest,AdaBoost,Naive Bayes,XGBoost
0,0.698901,0.716484,0.751648,0.720879,0.701099,0.720879,0.723077,0.694505,0.718681
1,0.67033,0.731868,0.727473,0.751648,0.727473,0.72967,0.727473,0.696703,0.725275
2,0.716484,0.727473,0.749451,0.76044,0.72967,0.718681,0.745055,0.69011,0.716484
3,0.718681,0.727473,0.725275,0.778022,0.740659,0.76044,0.747253,0.698901,0.751648
4,0.643956,0.643956,0.657143,0.646154,0.630769,0.654945,0.632967,0.624176,0.648352
5,0.723077,0.716484,0.756044,0.782418,0.767033,0.734066,0.751648,0.685714,0.727473
6,0.67033,0.731868,0.727473,0.751648,0.727473,0.723077,0.727473,0.696703,0.725275
7,0.727473,0.707692,0.751648,0.738462,0.725275,0.756044,0.714286,0.694505,0.718681
8,0.716484,0.692308,0.69011,0.712088,0.716484,0.705495,0.712088,0.698901,0.705495
9,0.661538,0.714286,0.70989,0.712088,0.698901,0.701099,0.687912,0.707692,0.701099


Unnamed: 0,Nearest Neighbors,Linear SVM,RBF SVM,Gaussian Process,Decision Tree,Random Forest,AdaBoost,Naive Bayes,XGBoost
0,0.72967,0.784615,0.802198,0.789011,0.692308,0.762637,0.756044,0.727473,0.78022
1,0.692308,0.747253,0.808791,0.793407,0.672527,0.738462,0.723077,0.69011,0.727473
2,0.753846,0.791209,0.797802,0.802198,0.738462,0.789011,0.762637,0.738462,0.793407
3,0.745055,0.751648,0.771429,0.791209,0.751648,0.771429,0.782418,0.705495,0.78022
4,0.668132,0.723077,0.751648,0.72967,0.652747,0.725275,0.685714,0.718681,0.712088
5,0.712088,0.681319,0.705495,0.767033,0.696703,0.769231,0.747253,0.661538,0.758242
6,0.692308,0.747253,0.808791,0.793407,0.672527,0.738462,0.718681,0.69011,0.727473
7,0.731868,0.795604,0.791209,0.793407,0.731868,0.778022,0.771429,0.720879,0.784615
8,0.703297,0.749451,0.769231,0.775824,0.703297,0.753846,0.70989,0.742857,0.736264
9,0.703297,0.727473,0.745055,0.756044,0.676923,0.753846,0.736264,0.687912,0.756044


Unnamed: 0,Nearest Neighbors,Linear SVM,RBF SVM,Gaussian Process,Decision Tree,Random Forest,AdaBoost,Naive Bayes,XGBoost
0,0.731868,0.791209,0.810989,0.802198,0.681319,0.8,0.78022,0.740659,0.817582
1,0.707692,0.731868,0.778022,0.804396,0.683516,0.747253,0.725275,0.681319,0.751648
2,0.758242,0.793407,0.8,0.815385,0.714286,0.769231,0.771429,0.745055,0.782418
3,0.745055,0.764835,0.806593,0.797802,0.740659,0.749451,0.758242,0.707692,0.764835
4,0.674725,0.723077,0.731868,0.727473,0.687912,0.738462,0.714286,0.714286,0.716484
5,0.740659,0.657143,0.701099,0.747253,0.725275,0.76044,0.740659,0.67033,0.749451
6,0.707692,0.731868,0.778022,0.804396,0.683516,0.753846,0.723077,0.681319,0.751648
7,0.745055,0.797802,0.797802,0.802198,0.685714,0.793407,0.740659,0.727473,0.778022
8,0.70989,0.756044,0.747253,0.786813,0.72967,0.769231,0.716484,0.738462,0.751648
9,0.712088,0.734066,0.747253,0.758242,0.698901,0.762637,0.712088,0.69011,0.756044


Unnamed: 0,Nearest Neighbors,Linear SVM,RBF SVM,Gaussian Process,Decision Tree,Random Forest,AdaBoost,Naive Bayes,XGBoost
0,0.718681,0.78022,0.784615,0.789011,0.753846,0.767033,0.747253,0.67033,0.778022
1,0.742857,0.821978,0.852747,0.854945,0.703297,0.81978,0.810989,0.70989,0.830769
2,0.718681,0.778022,0.784615,0.762637,0.672527,0.773626,0.771429,0.685714,0.786813
3,0.63956,0.738462,0.782418,0.786813,0.703297,0.734066,0.767033,0.635165,0.745055
4,0.679121,0.705495,0.705495,0.687912,0.663736,0.70989,0.663736,0.672527,0.701099
5,0.659341,0.696703,0.692308,0.705495,0.628571,0.703297,0.698901,0.613187,0.70989
6,0.742857,0.821978,0.852747,0.854945,0.703297,0.81978,0.810989,0.70989,0.832967
7,0.70989,0.795604,0.795604,0.795604,0.740659,0.795604,0.758242,0.694505,0.789011
8,0.727473,0.804396,0.817582,0.81978,0.736264,0.786813,0.797802,0.705495,0.815385
9,0.782418,0.786813,0.782418,0.791209,0.698901,0.793407,0.745055,0.696703,0.786813
