In [1]:
import json
import numpy as np
from scipy.spatial.distance import euclidean, cosine
import cv2
import os
import yaml
from tqdm import tqdm
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from typing import Counter

In [2]:
def process_images(image_folder, label_folder, output_json):
    """Procesa todas las imágenes en la carpeta y extrae características de cada espora."""
    all_features = {}

    # Obtener lista de archivos en ambas carpetas
    image_files = {os.path.splitext(f)[0]: os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.jpg', '.png', '.jpeg'))}
    label_files = {os.path.splitext(f)[0]: os.path.join(label_folder, f) for f in os.listdir(label_folder) if f.endswith('.txt')}

    # Procesar solo los archivos que tienen imagen y label correspondiente
    common_files = image_files.keys() & label_files.keys()

    # Cargar el archivo YAML
    try:
        with open(".\\Imagenes\\dataset\\data.yaml", "r") as file:
            data = yaml.safe_load(file)  # Carga el contenido del YAML
    except:
        with open("../Imagenes/dataset/data.yaml", "r") as file:
            data = yaml.safe_load(file)  # Carga el contenido del YAML

    # Extraer la lista de nombres de las clases
    class_names = data.get("names", [])  # Si "names" no existe, devuelve una lista vacía

    for file_name in tqdm(common_files):
        image_path = image_files[file_name]
        label_path = label_files[file_name]

        # print(f"Procesando: {image_path} con {label_path}")

        image = cv2.imread(image_path)
        if image is None:
            print(f"Error: No se pudo cargar la imagen {image_path}")
            continue

        bboxes = load_labels(label_path, image.shape)

        for i, (class_id, x_min, y_min, width, height) in enumerate(bboxes):
            roi = image[y_min:y_min+height, x_min:x_min+width]

            if roi.size == 0:
                continue

            features = extract_features(roi)
            # Agregar información del bounding box
            espora_id = f"{file_name}_espora_{i}_class_{class_id}"
            all_features[espora_id] = {
                "bounding_box": {
                    "class": class_names[class_id],
                    "x_min": x_min,
                    "y_min": y_min,
                    "width": width,
                    "height": height
                },
                "features": features
            }

    # Guardar en JSON
    with open(output_json, "w") as json_file:
        json.dump(all_features, json_file, indent=4)


In [3]:
def load_database(json_path):
    """Carga la base de datos de esporas desde un JSON."""
    try:
        with open(json_path, "r") as file:
            return json.load(file)
    except:
        try:
            image_folder = ".\\Imágenes\\dataset\\train\\images"
            label_folder = ".\\Imágenes\\dataset\\train\\labels"
            output_json = ".\\spore_features.json"
        except:
            image_folder = "../Imágenes/dataset/train/images"
            label_folder = "../Imágenes/dataset/train/labels"
            output_json = "../spore_features.json"
        process_images(image_folder, label_folder, output_json)


# Cargar base de datos
try:
    database = load_database("D:\\MatCom\\4toanno\\1er_Semestre\\Machine_Learning\\Proyecto\\CBR_algorithim\\spore_features.json")
except:
    database = load_database("../spore_features.json")



In [4]:

def chi_square_distance(hist1, hist2):
    """Calcula la distancia Chi-cuadrado entre dos histogramas."""
    return 0.5 * np.sum(((hist1 - hist2) ** 2) / (hist1 + hist2 + 1e-7))

def compare_cases(case1, case2):
    """Calcula la similitud entre dos esporas usando varias métricas."""
    
    # Comparación de bounding box (penaliza esporas con tamaños muy diferentes)
    bbox_diff = abs(case1["bounding_box"]["width"] - case2["bounding_box"]["width"]) + \
                abs(case1["bounding_box"]["height"] - case2["bounding_box"]["height"])

    # Comparación de estadísticas básicas (media, desviación estándar, valores mínimo y máximo)
    stats1 = np.array([case1["features"]["stats"]["mean_gray"], 
                        case1["features"]["stats"]["std_gray"], 
                        case1["features"]["stats"]["min_gray"], 
                        case1["features"]["stats"]["max_gray"]])
    stats2 = np.array([case2["features"]["stats"]["mean_gray"], 
                        case2["features"]["stats"]["std_gray"], 
                        case2["features"]["stats"]["min_gray"], 
                        case2["features"]["stats"]["max_gray"]])
    stats_distance = euclidean(stats1, stats2)

    # Comparación de histogramas de color en HSV
    hist1 = np.concatenate(case1["features"]["color_features"]["hist_hsv"])
    hist2 = np.concatenate(case2["features"]["color_features"]["hist_hsv"])
    hist_similarity = cosine(hist1, hist2)

    # Comparación de características de textura (GLCM)
    texture1 = np.array([
        case1["features"]["texture_features"]["contrast"],
        case1["features"]["texture_features"]["homogeneity"],
        case1["features"]["texture_features"]["energy"],
        case1["features"]["texture_features"]["correlation"]
    ])
    texture2 = np.array([
        case2["features"]["texture_features"]["contrast"],
        case2["features"]["texture_features"]["homogeneity"],
        case2["features"]["texture_features"]["energy"],
        case2["features"]["texture_features"]["correlation"]
    ])
    texture_distance = euclidean(texture1, texture2)

    # Comparación de momentos de Hu (medida de similitud basada en distancia euclidiana)
    hu1 = np.array(case1["features"]["stats"]["hu_moments"])
    hu2 = np.array(case2["features"]["stats"]["hu_moments"])
    hu_distance = euclidean(hu1, hu2)

    # Comparación de LBP (usando distancia de Chi-cuadrado)
    lbp1 = np.array(case1["features"]["texture_features"]["lbp_histogram"])
    lbp2 = np.array(case2["features"]["texture_features"]["lbp_histogram"])
    lbp_distance = chi_square_distance(lbp1, lbp2)

    # Ponderación de las similitudes
    similarity_score = (stats_distance * 0.2) + (hist_similarity * 0.25) + \
                       (texture_distance * 0.2) + (hu_distance * 0.2) + \
                       (lbp_distance * 0.1) + (bbox_diff * 0.05)

    return similarity_score

def find_similar_cases(new_case, database, top_n=5):
    """Encuentra los casos más similares en la base de datos."""
    similarities = []

    for espora_id, case in database.items():
        score = compare_cases(new_case, case)
        similarities.append((database[espora_id]["bounding_box"]["class"], score))

    # Ordenar por menor distancia (más similar)
    similarities.sort(key=lambda x: x[1])

    k_values = similarities[:top_n]
    threshold = 70

    # Decisión basada en umbral
    if min(k_values)[1] > threshold:
        return "Clasificación manual requerida"
    else:
        most_common = Counter(k_values).most_common()
        return most_common
    
def calculate_dynamic_threshold(database):
    """Calcula un umbral basado en el percentil 90 de las similitudes previas."""
    similarity_scores = []

    for i in range(len(database)):
        for j in range(i + 1, len(database)):
            similarity_scores.append(compare_cases(database[i], database[j]))

    return float(np.percentile(similarity_scores, 90))  # Usa el percentil 90 como umbral


In [5]:
def load_labels(label_path, image_shape):
    """Carga los bounding boxes desde un archivo de etiquetas YOLO."""
    h, w = image_shape[:2]
    bboxes = []

    with open(label_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        values = line.strip().split()
        class_id = int(values[0])
        x_center, y_center, width, height = map(float, values[1:])

        # Convertir coordenadas normalizadas a píxeles
        x_min = int((x_center - width / 2) * w)
        y_min = int((y_center - height / 2) * h)
        box_width = int(width * w)
        box_height = int(height * h)

        bboxes.append((class_id, x_min, y_min, box_width, box_height))

    return bboxes



In [6]:
def extract_features(image):
    """Extrae características de color, textura y estadísticas de una imagen."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Estadísticas básicas
    mean_gray =float(np.mean(gray))
    std_gray = float(np.std(gray))
    min_gray = float(np.min(gray))
    max_gray = float(np.max(gray))

    # Características de forma (Momentos de Hu)
    moments = cv2.moments(gray)
    hu_moments = list(cv2.HuMoments(moments).flatten())

    # Características de color (media y desviación estándar en RGB)
    mean_rgb = list(np.mean(image, axis=(0, 1)).tolist())
    std_rgb = list(np.std(image, axis=(0, 1)).tolist())

    # Características de color (media y desviación estándar en HSV)
    mean_hsv = list(np.mean(hsv, axis=(0, 1)).tolist())
    std_hsv = list(np.std(hsv, axis=(0, 1)).tolist())

    # Histograma de color en RGB
    list_rgb = [cv2.calcHist([image], [i], None, [256], [0, 256]).flatten().tolist() for i in range(3)]
    hist_rgb = [list([float(n) for n in h]) for h in list_rgb] # Cantidad de píxeles para cada posible intensidad de color (de 0 a 255).

    # Histograma de color en HSV (normalizado)
    list_hsv = [cv2.calcHist([hsv], [i], None, [256], [0, 256]).flatten() for i in range(3)]
    hist_hsv = [h / h.sum() for h in list_hsv]  # Normalización

    # Textura: características GLCM
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]

    # Local Binary Pattern (LBP)
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    lbp_hist = lbp_hist.astype("float")
    lbp_hist /= (lbp_hist.sum() + 1e-7)  # Normalización

    return {
        "color_features": {
            "mean_hsv": mean_hsv,
            "std_hsv": std_hsv,
            "hist_hsv": [h.tolist() for h in hist_hsv]  # Convertir a lista para JSON
        },
        "texture_features": {
            "contrast": contrast,
            "dissimilarity": dissimilarity,
            "homogeneity": homogeneity,
            "energy": energy,
            "correlation": correlation,
            "lbp_histogram": lbp_hist.tolist()
        },
        "stats": {
            "mean_gray": mean_gray,
            "std_gray": std_gray,
            "min_gray": min_gray,
            "max_gray": max_gray,
            "hu_moments": hu_moments
        }
    }


In [7]:
def segment_image(image):
    """ Segmenta la imagen para detectar esporas. """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Encontrar contornos
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]

    return bounding_boxes



In [8]:
def predict(image,case_database):
    """ Predice el tipo de espora en una imagen usando CBR y aprendizaje automático. """
    
    results = []

    # 1. Segmentar la imagen
    bounding_boxes = segment_image(image)

    for box in bounding_boxes:
        # 2. Extraer características de la espora detectada
        x, y, w, h = box
        roi = image[y:y+h, x:x+w]
        features = extract_features(roi)
        all_features = {
                "bounding_box": {
                    "class": '',
                    "x_min": x,
                    "y_min": y,
                    "width": w,
                    "height": h
                },
                "features": features
            }

        # 3. Buscar el caso más similar en la base de datos
        best_case = find_similar_cases(all_features, case_database)

      

    return best_case


In [9]:
# try:
#     image_folder = ".\\Imágenes\\dataset\\train\\images"
#     label_folder = ".\\Imágenes\\dataset\\train\\labels"
#     output_json = ".\\spore_features.json"
# except:
#     image_folder = "../Imágenes/dataset/train/images"
#     label_folder = "../Imágenes/dataset/train/labels"
#     output_json = "../spore_features.json"
# process_images(image_folder, label_folder, output_json)



# Cargar imagen
try:
    valid_image_folder = "D:\\MatCom\\4toanno\\1er_Semestre\\Machine_Learning\\Proyecto\\CBR_algorithim\\Imagenes\\dataset\\valid\\images"
    valid_image_files = {os.path.splitext(f)[0]: os.path.join(valid_image_folder, f) for f in os.listdir(valid_image_folder) if f.endswith(('.jpg', '.png', '.jpeg'))}
except:
    valid_image_folder = "../Imágenes/dataset/valid/images"
    valid_image_files = {os.path.splitext(f)[0]: os.path.join(valid_image_folder, f) for f in os.listdir(valid_image_folder) if f.endswith(('.jpg', '.png', '.jpeg'))}


# # Ejecutar predicción
for image in tqdm(valid_image_files.keys()):
    print(valid_image_files[image])
    image1 = cv2.imread(valid_image_files[image])
    resultados = predict(image1, database)

#     # Mostrar resultados
    for res in resultados:
        print(f"Espora detectada en {image} - Tipo: {res[0]} ")


  0%|          | 0/117 [00:00<?, ?it/s]

D:\MatCom\4toanno\1er_Semestre\Machine_Learning\Proyecto\CBR_algorithim\Imagenes\dataset\valid\images\11_1_jpg.rf.6d62d45ffe0decd5483c40ae58cbb24d.jpg


  1%|          | 1/117 [00:01<03:29,  1.81s/it]

Espora detectada en 11_1_jpg.rf.6d62d45ffe0decd5483c40ae58cbb24d - Tipo: ('curvularia', 63.79278384792397) 
Espora detectada en 11_1_jpg.rf.6d62d45ffe0decd5483c40ae58cbb24d - Tipo: ('curvularia', 64.67759677076519) 
Espora detectada en 11_1_jpg.rf.6d62d45ffe0decd5483c40ae58cbb24d - Tipo: ('curvularia', 65.43183872282592) 
Espora detectada en 11_1_jpg.rf.6d62d45ffe0decd5483c40ae58cbb24d - Tipo: ('curvularia', 65.65708644479831) 
Espora detectada en 11_1_jpg.rf.6d62d45ffe0decd5483c40ae58cbb24d - Tipo: ('curvularia', 65.96328161824286) 
D:\MatCom\4toanno\1er_Semestre\Machine_Learning\Proyecto\CBR_algorithim\Imagenes\dataset\valid\images\11_3_jpg.rf.449adc22ccdf26ad94b98e004d1be7f3.jpg


  2%|▏         | 2/117 [00:04<04:08,  2.17s/it]

Espora detectada en 11_3_jpg.rf.449adc22ccdf26ad94b98e004d1be7f3 - Tipo: ('curvularia', 64.71380421119741) 
Espora detectada en 11_3_jpg.rf.449adc22ccdf26ad94b98e004d1be7f3 - Tipo: ('curvularia', 65.81168646765775) 
Espora detectada en 11_3_jpg.rf.449adc22ccdf26ad94b98e004d1be7f3 - Tipo: ('curvularia', 66.66485877371353) 
Espora detectada en 11_3_jpg.rf.449adc22ccdf26ad94b98e004d1be7f3 - Tipo: ('curvularia', 66.85564706075618) 
Espora detectada en 11_3_jpg.rf.449adc22ccdf26ad94b98e004d1be7f3 - Tipo: ('curvularia', 67.54128923264929) 
D:\MatCom\4toanno\1er_Semestre\Machine_Learning\Proyecto\CBR_algorithim\Imagenes\dataset\valid\images\16_1_jpg.rf.1a7bd56ac1473510d3016f1a102879d7.jpg


  3%|▎         | 3/117 [00:07<04:48,  2.53s/it]

Espora detectada en 16_1_jpg.rf.1a7bd56ac1473510d3016f1a102879d7 - Tipo: ('curvularia', 66.10185762416332) 
Espora detectada en 16_1_jpg.rf.1a7bd56ac1473510d3016f1a102879d7 - Tipo: ('curvularia', 66.3695242992501) 
Espora detectada en 16_1_jpg.rf.1a7bd56ac1473510d3016f1a102879d7 - Tipo: ('cladosporium', 66.51341219683913) 
Espora detectada en 16_1_jpg.rf.1a7bd56ac1473510d3016f1a102879d7 - Tipo: ('cladosporium', 67.00494560780588) 
Espora detectada en 16_1_jpg.rf.1a7bd56ac1473510d3016f1a102879d7 - Tipo: ('curvularia', 67.05115875186107) 
D:\MatCom\4toanno\1er_Semestre\Machine_Learning\Proyecto\CBR_algorithim\Imagenes\dataset\valid\images\16_1_jpg.rf.c7bbcb931ed3ff67a07ca176de5063d5.jpg


  3%|▎         | 4/117 [00:09<04:16,  2.27s/it]

Espora detectada en 16_1_jpg.rf.c7bbcb931ed3ff67a07ca176de5063d5 - Tipo: ('curvularia', 62.33747228022654) 
Espora detectada en 16_1_jpg.rf.c7bbcb931ed3ff67a07ca176de5063d5 - Tipo: ('curvularia', 62.900643982487026) 
Espora detectada en 16_1_jpg.rf.c7bbcb931ed3ff67a07ca176de5063d5 - Tipo: ('curvularia', 63.65332287406624) 
Espora detectada en 16_1_jpg.rf.c7bbcb931ed3ff67a07ca176de5063d5 - Tipo: ('curvularia', 64.18875514361713) 
Espora detectada en 16_1_jpg.rf.c7bbcb931ed3ff67a07ca176de5063d5 - Tipo: ('curvularia', 64.79043879632961) 
D:\MatCom\4toanno\1er_Semestre\Machine_Learning\Proyecto\CBR_algorithim\Imagenes\dataset\valid\images\18_2_jpg.rf.9da0ae887c5930a714093651e5f97bd8.jpg


  3%|▎         | 4/117 [00:10<04:49,  2.56s/it]


KeyboardInterrupt: 