In [1]:
import os
import cv2
import random
import numpy as np
from skimage import feature
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
def criarListaImagens(listaCategorias):
  lista = []
  for categoria in listaCategorias:
    path = os.path.join(data_dir, categoria)
    class_num = listaCategorias.index(categoria)
    for img in os.listdir(path):
      try:
        img_array = cv2.imread(os.path.join(path, img))
        new_array = cv2.resize(img_array, (img_size, img_size))
        lista.append([new_array, class_num])
      except Exception as e:
        print(e)
        pass
  return lista

#----------------------------------------------------------------------#

def criaListaTreinoTeste(lista):
  classes = []
  imagens = []
  for imagem in lista:
    imagens.append(imagem[0])
    classes.append(imagem[1])
  return imagens, classes

#----------------------------------------------------------------------#

def obtemCanaisCoresRGB(listaOrigem):
  listaDestino = []
  for imagem in listaOrigem:
    aux = cv2.split(imagem)
    aux.append(cv2.cvtColor(imagem, cv2.COLOR_BGR2GRAY))
    listaDestino.append(aux)

  return listaDestino

#----------------------------------------------------------------------#

def obterCanaisCores(listaOrigem, cvt):
  listaDestino = []
  for imagem in listaOrigem:    
    i = cv2.cvtColor(imagem, cvt)
    aux = cv2.split(i)
    aux = cv2.split(i)
    aux.append(cv2.cvtColor(i, cv2.COLOR_BGR2GRAY))
    listaDestino.append(aux)

  return listaDestino

#----------------------------------------------------------------------#

class LocalBinaryPatterns:
	def __init__(self, numPoints, radius):
		self.numPoints = numPoints
		self.radius = radius
	def describe(self, image, eps=1e-7):
		lbp = feature.local_binary_pattern(image, self.numPoints,
			self.radius, method="uniform")
		(hist, _) = np.histogram(lbp.ravel(),
			bins=np.arange(0, self.numPoints + 3),
			range=(0, self.numPoints + 2))
		hist = hist.astype("float")
		hist /= (hist.sum() + eps)
		return hist

#----------------------------------------------------------------------#

def obtemHistogramaCanais(listaImagens, lbp):
  canal_1 = []
  canal_2 = []
  canal_3 = []
  canal_cinza = []
  for imagem in listaImagens:
    canal_1.append(lbp.describe(imagem[0]))
    canal_2.append(lbp.describe(imagem[1]))
    canal_3.append(lbp.describe(imagem[2]))
    canal_cinza.append(lbp.describe(imagem[3]))
  return canal_1, canal_2, canal_3, canal_cinza

#----------------------------------------------------------------------#


In [5]:
data_dir = r"C:\Users\Rosana\Documents\Mestrado\DataSets\RIM_ONE_v2"
categorias = ["glaucoma","normal"]
img_size = 200
listaImagens = criarListaImagens(categorias)
random.shuffle(listaImagens)

imagens, classes = criaListaTreinoTeste(listaImagens)
imagens_treino, imagens_teste, classes_treino, classes_teste = train_test_split(imagens, classes, test_size=0.33, random_state=42)

listaRgbTreino = obtemCanaisCoresRGB(imagens_treino)
listaHsvTreino = obterCanaisCores(imagens_treino, cv2.COLOR_BGR2HSV)
listaLabTreino = obterCanaisCores(imagens_treino, cv2.COLOR_BGR2LAB)
listaLuvTreino = obterCanaisCores(imagens_treino, cv2.COLOR_BGR2LUV)

listaRgbTeste = obtemCanaisCoresRGB(imagens_teste)
listaHsvTeste = obterCanaisCores(imagens_teste, cv2.COLOR_BGR2HSV)
listaLabTeste = obterCanaisCores(imagens_teste, cv2.COLOR_BGR2LAB)
listaLuvTeste = obterCanaisCores(imagens_teste, cv2.COLOR_BGR2LUV)

_lbp = LocalBinaryPatterns(24, 8)

lista_treino_rgb_b, lista_treino_rgb_g, lista_treino_rgb_r, lista_treino_rgb_gray = obtemHistogramaCanais(listaRgbTreino, _lbp)
lista_treino_hsv_h, lista_treino_hsv_s, lista_treino_hsv_v, _ = obtemHistogramaCanais(listaHsvTreino, _lbp)
lista_treino_lab_l, lista_treino_lab_a, lista_treino_lab_b, _ = obtemHistogramaCanais(listaLabTreino, _lbp)
lista_treino_luv_l, lista_treino_luv_u, lista_treino_luv_v, _ = obtemHistogramaCanais(listaLuvTreino, _lbp)

lista_teste_rgb_b, lista_teste_rgb_g, lista_teste_rgb_r, lista_teste_rgb_gray = obtemHistogramaCanais(listaRgbTeste, _lbp)
lista_teste_hsv_h, lista_teste_hsv_s, lista_teste_hsv_v, _ = obtemHistogramaCanais(listaHsvTeste, _lbp)
lista_teste_lab_l, lista_teste_lab_a, lista_teste_lab_b, _ = obtemHistogramaCanais(listaLabTeste, _lbp)
lista_teste_luv_l, lista_teste_luv_u, lista_teste_luv_v, _ = obtemHistogramaCanais(listaLuvTeste, _lbp)


In [6]:
print(len(classes_teste))

151


In [None]:
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from xgboost import XGBClassifier

def xgboost(listaTreino, listaTeste, classesTreino, classesTeste):    
    m = XGBClassifier(colsample_bytree= 0.7, 
    gamma= 0.2, learning_rate= 0.15, max_depth= 5, 
    min_child_weight= 1, eta= 0.3 ).fit(listaTreino, classesTreino)
    preds = m.predict(listaTeste)
    y_pred = m.predict(listaTeste)     
    acc = accuracy_score(classesTeste, y_pred)
    f1  = f1_score(classesTeste, y_pred, average=None)
    cm  = confusion_matrix(classesTeste, y_pred)
    return acc, f1, cm

xgboost(np.array(lista_treino_rgb_r), np.array(lista_teste_rgb_r), np.array(classes_treino), np.array(classes_teste))

In [9]:

parameters = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.2, 0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.7, 0.8, 1.0],
        'max_depth': [3, 4, 5],
        'n_estimators': [100, 200, 300, 400, 500]
        }


estimator = XGBClassifier(
    objective= 'binary:logistic',
    nthread=4,
    seed = 42
)

grid_search = GridSearchCV(
    estimator=estimator,
    param_grid=parameters,
    scoring = 'accuracy',
    n_jobs = 10,
    cv = 10,
    verbose=False
)

In [134]:
# RGB - GRAY
X = np.concatenate((np.array(lista_treino_rgb_gray), np.array(lista_teste_rgb_gray)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_params_)
print(best_model.best_score_)



{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}
0.789010989010989


In [10]:
# RGB - R
X = np.concatenate((np.array(lista_treino_rgb_r), np.array(lista_teste_rgb_r)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)



0.8505494505494505
{'colsample_bytree': 0.6, 'gamma': 5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': 0.8}


In [11]:
# RGB - G
X = np.concatenate((np.array(lista_treino_rgb_g), np.array(lista_teste_rgb_g)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)

0.8065934065934066
{'colsample_bytree': 0.7, 'gamma': 2, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 100, 'subsample': 0.8}




In [12]:
# RGB - B
X = np.concatenate((np.array(lista_treino_rgb_b), np.array(lista_teste_rgb_b)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)

0.789010989010989
{'colsample_bytree': 0.7, 'gamma': 2, 'max_depth': 4, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': 0.8}




In [138]:
# HSV - H
X = np.concatenate((np.array(lista_treino_hsv_h), np.array(lista_teste_hsv_h)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)

0.7076923076923077
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [139]:
# HSV - S
X = np.concatenate((np.array(lista_treino_hsv_s), np.array(lista_teste_hsv_s)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)

0.7142857142857143
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [140]:
# HSV - V
X = np.concatenate((np.array(lista_treino_hsv_v), np.array(lista_teste_hsv_v)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)



0.832967032967033
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [141]:
# LAB - L
X = np.concatenate((np.array(lista_treino_lab_l), np.array(lista_teste_lab_l)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)



0.778021978021978
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [142]:
# LAB - A
X = np.concatenate((np.array(lista_treino_lab_a), np.array(lista_teste_lab_a)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)

0.8241758241758241
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [143]:
# LAB - B
X = np.concatenate((np.array(lista_treino_lab_b), np.array(lista_teste_lab_b)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)



0.8021978021978022
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [144]:
# LUV - L
X = np.concatenate((np.array(lista_treino_luv_l), np.array(lista_teste_luv_l)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)



0.7692307692307693
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [145]:
# LUV - U
X = np.concatenate((np.array(lista_treino_luv_u), np.array(lista_teste_luv_u)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)



0.7934065934065934
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}


In [146]:
# LUV - V
X = np.concatenate((np.array(lista_treino_luv_v), np.array(lista_teste_luv_v)))
y = np.concatenate((np.array(classes_treino), np.array(classes_teste)))
best_model = grid_search.fit(X, y)
print(best_model.best_score_)
print(best_model.best_params_)

0.7802197802197802
{'colsample_bytree': 0.7, 'gamma': 0.2, 'learning_rate': 0.15, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500}
