Cleaning avec un model tensorflow

In [5]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

In [6]:
def preprocess_image_grayscale(img):
    """Convertit une image RGB en niveaux de gris et la duplique sur 3 canaux."""
    img = tf.image.rgb_to_grayscale(img)
    img = tf.image.grayscale_to_rgb(img)
    return img

In [7]:
import os
import shutil
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def identify_and_move_low_confidence_images(base_dir, model, threshold, low_conf_dest):
    datagen = ImageDataGenerator(preprocessing_function=preprocess_image_grayscale)
    generator = datagen.flow_from_directory(
        base_dir,
        target_size=(256, 256),
        batch_size=32,
        class_mode='binary',  # ou 'categorical' si vous avez plus de 2 classes
        shuffle=False)

    # Prédictions
    predictions = model.predict(generator, steps=np.ceil(generator.samples / batch_size))
    predicted_confidences = predictions.squeeze()

    # Ajuster la condition pour cibler les images à faible confiance
    low_confidence_indices = np.where(
        (predicted_confidences > 0.5 - threshold) &
        (predicted_confidences < 0.5 + threshold)
    )[0]
    low_confidence_files = np.array(generator.filenames)[low_confidence_indices]

    # Déplacer les images à faible confiance
    for idx in low_confidence_indices:
        file_path = generator.filenames[idx]
        source_path = os.path.join(base_dir, file_path)
        destination_path = os.path.join(low_conf_dest, file_path.split('/')[-2], file_path.split('/')[-1])  # Garde la structure Martensite/Perlite
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.move(source_path, destination_path)
        print(f"Moved: {source_path} to {destination_path} - Confidence: {predicted_confidences[idx]:.4f}")

In [None]:
# Paramètres
target_size = (256, 256)
batch_size = 32
model_path = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Models/Main_classification_model/Class_pearl_mart_cleaned_V5_batch16_grey_365img_dataset.h5'
base_dir = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Dataset_Structures_modes/ImageDataGen_825_cleaned/test'
low_conf_dest = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Dataset_Structures_modes/ImageDataGen_825_cleaned_lowconf/test'
threshold = 0.1  # Définir le seuil de faible confiance autour de 50%

# Chargement du modèle
model = tf.keras.models.load_model(model_path)

# Identifier et déplacer les images à faible confiance
identify_and_move_low_confidence_images(base_dir, model, threshold, low_conf_dest)

Cleaning avec un model pytorch

V1, attention si move_wrong_prediction = False, le CSV est faux, mais sinon ca marche quand même !

In [45]:
import os
import shutil
import csv
from ultralytics import YOLO

def identify_and_move_low_confidence_images_yolo_to_csv(base_dir, model_path, threshold, low_conf_dest, csv_file_path, move_wrong_prediction=True):
    # Chargement du modèle YOLO
    model = YOLO(model_path)
    
    # Préparer l'entête du fichier CSV
    with open(csv_file_path, 'a', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(['File Name', 'Reason', 'Confidence'])
        
        # Définition des labels pour correspondance avec les dossiers
        labels = {0: 'Martensite', 1: 'Pearlite'}
        
        # Parcourir chaque fichier image dans le dossier base
        for root, dirs, files in os.walk(base_dir):
            class_dir = os.path.basename(root)
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(root, file)
                    
                    # Prédiction avec YOLO
                    det = model.predict(img_path)
                    
                    # Logique de prédiction et de confiance
                    predicted_label_id = det[0].probs.top1
                    predicted_label = labels.get(predicted_label_id, None)
                    top_confidence = det[0].probs.top1conf
                    reason = ""
                    move_image = False
                    
                    if predicted_label and predicted_label.lower() != class_dir.lower() and move_wrong_prediction:
                        reason = "Incorrect Prediction"
                        move_image = True
                    elif top_confidence < threshold:
                        reason = "Low Confidence"
                        move_image = True
                        
                    if move_image:
                        # Calculer le chemin de destination
                        destination_path = os.path.join(low_conf_dest, class_dir, file)
                        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
                        shutil.move(img_path, destination_path)
                        # Écrire les informations dans le fichier CSV
                        csvwriter.writerow([file, reason, f"{top_confidence:.4f}"])
                        print(f"Moved: {file} - Reason: {reason} - Confidence: {top_confidence:.4f}")

In [46]:
# Paramètres
model_path = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/dataset-full/100_img_manual_dataset/yolo_model/train200/weights/best.pt'
base_dir = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Dataset_Structures_modes/ImageDataGen_825_cleaned/train'
low_conf_dest = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Dataset_Structures_modes/ImageDataGen_825_cleaned_lowconf/train'
threshold = 0.85  # Ajustez ce seuil selon vos besoins
csv_file_path = 'test.csv'  # Chemin vers le fichier de log
move_wrong_prediction = True  # Modifiez cette valeur pour activer/désactiver le déplacement des mauvaises prédictions

In [None]:
# Appeler la fonction
identify_and_move_low_confidence_images_yolo_to_csv(base_dir, model_path, threshold, low_conf_dest, csv_file_path, move_wrong_prediction)

V2

In [50]:
import os
import shutil
import csv
from ultralytics import YOLO

def identify_and_move_low_confidence_images_yolo_to_csv(base_dir, model_path, threshold, low_conf_dest, csv_file_path, label_to_dir_mapping, move_wrong_prediction=True):
    # Chargement du modèle YOLO
    model = YOLO(model_path)
    
    with open(csv_file_path, 'a', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        if os.stat(csv_file_path).st_size == 0:
            csvwriter.writerow(['File Name', 'Class Directory', 'Predicted Label', 'Reason', 'Confidence'])

        for root, dirs, files in os.walk(base_dir):
            class_dir = os.path.basename(root).lower()
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(root, file)
                    det = model.predict(img_path)

                    predicted_label_id = det[0].probs.top1
                    predicted_label = label_to_dir_mapping.get(predicted_label_id, "")
                    top_confidence = det[0].probs.top1conf
                    reason = ""
                    move_image = False

                    # Vérifier si le dossier actuel correspond au label prédit
                    if (class_dir != predicted_label.lower() and move_wrong_prediction) or top_confidence < threshold:
                        reason = "Incorrect Prediction" if class_dir != predicted_label.lower() else "Low Confidence"
                        move_image = True

                    if move_image:
                        destination_path = os.path.join(low_conf_dest, class_dir, file)
                        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
                        shutil.move(img_path, destination_path)
                        csvwriter.writerow([file, class_dir, predicted_label, reason, f"{top_confidence:.4f}"])
                        print(f"Moved: {file} - Class Directory: {class_dir} - Predicted Label: {predicted_label} - Reason: {reason} - Confidence: {top_confidence:.4f}")

In [55]:
# Paramètres
model_path = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/dataset-full/100_img_manual_dataset/yolo_model/train200/weights/best.pt'
base_dir = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Dataset_Structures_modes/ImageDataGen_825_cleaned/val'
low_conf_dest = '/home/riccardo/Visual_Studio_Code/Grain-segmentation_prjt/Dataset_Structures_modes/ImageDataGen_825_cleaned_lowconf/val'
threshold = 0.85  # Ajustez ce seuil selon vos besoins
csv_file_path = 'test.csv'  # Chemin vers le fichier de log
label_to_dir_mapping = {0: 'Martensite_microstructure_img_cropped_V3_manualy_cleaned_yolo', 1: 'pearlite_microstructure_img_cropped_V3_cleaned_yolo'}
move_wrong_prediction = True  # Modifiez cette valeur pour activer/désactiver le déplacement des mauvaises prédictions

In [None]:
identify_and_move_low_confidence_images_yolo_to_csv(base_dir, model_path, threshold, low_conf_dest, csv_file_path, label_to_dir_mapping, move_wrong_prediction)