In [4]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from skimage.feature.texture import local_binary_pattern, greycomatrix, greycoprops
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm
import pickle
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Passo 1: Diretório de imagens

database = 'MED115'

#############################################################################################################################

directory = os.path.join(os.getcwd(), database, database + '_database')

# Passo 2: Ler o arquivo classes.txt para obter as informações sobre as classes de cada imagem
class_dict = {}
with open(database + '/classes.txt', 'rb') as file:
    class_dict = pickle.load(file, encoding='latin1')

# Passo 3: Definir uma função para extrair o descritor texture de uma imagem
def extract_texture(image):
    grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    texture = local_binary_pattern(grey, 16, 2, method='uniform')
    hist, _ = np.histogram(texture.ravel())
    hist = hist.astype('float')
    hist /= (hist.sum() + 1e-7)  # Normalização
    
    # Calcular a matriz de coocorrência GLCM
    glcm = greycomatrix(grey, [2], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
    
    # Calcular as propriedades do GLCM
    contrast = greycoprops(glcm, 'contrast').ravel()
    dissimilarity = greycoprops(glcm, 'dissimilarity').ravel()
    homogeneity = greycoprops(glcm, 'homogeneity').ravel()
    energy = greycoprops(glcm, 'energy').ravel()
    correlation = greycoprops(glcm, 'correlation').ravel()
    
    # Concatenar os descritores
    descritor_texture = np.hstack((hist.ravel(), contrast, dissimilarity, homogeneity, energy, correlation))
    return descritor_texture

# Passo 4: Percorrer o diretório de imagens, carregar cada imagem, extrair o descritor texture e armazenar o descritor e a classe correspondente em listas separadas

image_files = [filename for filename in os.listdir(directory)] 
total_images = len(image_files)

data_dict = {}
for filename in tqdm(image_files, desc='Processando imagens', unit='imagem', ncols=80):
    image = cv2.imread(os.path.join(directory, filename))
    if image is not None:
        texture_hist = extract_texture(image)
        label = class_dict.get(filename, None)
        if label is not None:
            data_dict[filename] = np.insert(texture_hist, 0, label)

# Extração das features e labels para escalonamento
data_labels = [sample[0] for sample in data_dict.values()] 
data_features = [sample[1:] for sample in data_dict.values()] 

# Aplicar o StandardScaler nas features
scaler = StandardScaler()
data_features = scaler.fit_transform(data_features)

# Reconstituir data_dict com as features escalonadas
for i, key in enumerate(data_dict.keys()):
    data_dict[key][1:] = data_features[i]

# Salvar o data_dict com as features escalonadas
directory = os.path.join(os.getcwd(), database)
arquivo = os.path.join(directory, 'texture.db')
with open(arquivo, 'wb') as arquivo:
    arquivo.write(pickle.dumps(data_dict))

# Passo 5: Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(data_features, data_labels, test_size=0.3, shuffle=True)

classificador_rf = RandomForestClassifier(n_estimators=300)
classificador_knn = KNeighborsClassifier(n_neighbors=5)

# Lista de classificadores
classificadores = [classificador_rf, classificador_knn]

# Passo 7: Realizar a validação cruzada k-fold para cada classificador
for classificador in classificadores:
    print(f"Classificador: {classificador.__class__.__name__}")
    
    kf = StratifiedKFold(n_splits=4, shuffle=True)  # Usar 4 divisões
    acuracias = []
    f1_scores = []

    for train_index, test_index in kf.split(data_features, data_labels):
        X_train, X_test = np.array(data_features)[train_index], np.array(data_features)[test_index]
        y_train, y_test = np.array(data_labels)[train_index], np.array(data_labels)[test_index]

        classificador.fit(X_train, y_train)
        y_pred = classificador.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')

        acuracias.append(accuracy)
        f1_scores.append(f1)

    # Calcular a acurácia e o F1-score médios em todas as divisões e o desvio-padrão
    acuracia_media = np.mean(acuracias)
    f1_score_media = np.mean(f1_scores)
    desvio_padrao_acuracia = np.std(acuracias)
    desvio_padrao_f1_score = np.std(f1_scores)

    # Exibir as acurácias, F1-scores, acurácia média e o desvio-padrão
    print("\n")
    for i, (acuracia, f1) in enumerate(zip(acuracias, f1_scores)):
        print("Fold {}: Acurácia: {:.2f}%, F1-score: {:.2f}%".format(i+1, acuracia * 100, f1 * 100))

    print("Acurácia Média: {:.2f}%".format(acuracia_media * 100))
    print("Desvio-Padrão Médio Acurácia: {:.2f}%".format(desvio_padrao_acuracia * 100))
    print("F1-score Médio: {:.2f}%".format(f1_score_media * 100))
    print("Desvio-Padrão Médio F1-score: {:.2f}%".format(desvio_padrao_f1_score * 100))
    print("\n")

Processando imagens: 100%|████████████| 8456/8456 [1:48:23<00:00,  1.30imagem/s]


Classificador: RandomForestClassifier


Fold 1: Acurácia: 97.35%, F1-score: 97.30%
Fold 2: Acurácia: 97.07%, F1-score: 97.03%
Fold 3: Acurácia: 97.63%, F1-score: 97.61%
Fold 4: Acurácia: 96.59%, F1-score: 96.56%
Acurácia Média: 97.16%
Desvio-Padrão Médio Acurácia: 0.38%
F1-score Médio: 97.13%
Desvio-Padrão Médio F1-score: 0.38%


Classificador: KNeighborsClassifier


Fold 1: Acurácia: 92.95%, F1-score: 92.75%
Fold 2: Acurácia: 93.42%, F1-score: 93.34%
Fold 3: Acurácia: 93.85%, F1-score: 93.73%
Fold 4: Acurácia: 93.19%, F1-score: 93.14%
Acurácia Média: 93.35%
Desvio-Padrão Médio Acurácia: 0.33%
F1-score Médio: 93.24%
Desvio-Padrão Médio F1-score: 0.35%


