### XAI-Analyse

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import pathlib
import uuid
import shutil
from sklearn.model_selection import train_test_split, StratifiedGroupKFold
from tensorflow import keras
import matplotlib.cm as cm

# KONFIGURATION
EXPERIMENT_SEED = 42
IMG_SIZE = (224, 224)
PLANTDOC_DIR = '/mnt/c/Users/sandr/Desktop/Studium/thesis/praxis/data/plantdoc_cropped'
OUTPUT_DIR = "./blind_study"
MODELS_DIR = "./models"

# Die 5 Klassen für die Analyse
TARGET_CLASSES = [
    "Tomato___Late_blight",
    "Tomato___Tomato_Yellow_Leaf_Curl_Virus",
    "Squash___Powdery_mildew",
    "Tomato___Bacterial_spot",
    "Tomato___healthy"
]

IMAGES_PER_CLASS = 10
MIN_PIXEL_SIZE = 300  # Wunschgröße (wird unterschritten, falls nötig)
HEATMAP_ALPHA = 0.6   # Sichtbarkeit der Heatmap

# Alle 4 Modelle
model_files = {
    "Model_A": f"model_A_seed_{EXPERIMENT_SEED}.keras",
    "Model_B": f"model_B_seed_{EXPERIMENT_SEED}.keras",
    "Model_C": f"model_C_seed_{EXPERIMENT_SEED}.keras",
    "Model_D": f"model_D_seed_{EXPERIMENT_SEED}.keras"
}

# GRAD-CAM Implementierung [https://keras.io/examples/vision/grad_cam/]
def find_target_layer(model):
    """Sucht den letzten Layer im Hauptmodell, der einen 4D-Output hat."""
    for layer in reversed(model.layers):
        if hasattr(layer, 'output_shape'):
            shape = layer.output_shape
            if isinstance(shape, list):
                shape = shape[0]
            if len(shape) == 4 and 'input' not in layer.name.lower():
                print(f"Target Layer gefunden: {layer.name} (Shape: {shape})")
                return layer.name
    return 'efficientnetb0'

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    conv_layer = model.get_layer(last_conv_layer_name)
    
    # Modell in Conv-Teil und Classifier-Teil splitten
    base_model_index = -1
    for i, layer in enumerate(model.layers):
        if layer.name == last_conv_layer_name:
            base_model_index = i
            break
            
    head_layers = model.layers[base_model_index + 1:]
    
    conv_model = keras.models.Model(inputs=conv_layer.inputs, outputs=conv_layer.output)
    
    head_input = keras.Input(shape=conv_layer.output_shape[1:])
    x = head_input
    for layer in head_layers:
        x = layer(x)
    head_model = keras.models.Model(inputs=head_input, outputs=x)

    with tf.GradientTape() as tape:
        conv_outputs = conv_model(img_array)
        tape.watch(conv_outputs)
        predictions = head_model(conv_outputs)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    
    epsilon = tf.keras.backend.epsilon()
    heatmap = tf.maximum(heatmap, 0) / (tf.math.reduce_max(heatmap) + epsilon)
    
    return heatmap.numpy()

def save_and_display_gradcam(img_path, heatmap, save_path, alpha=0.4):
    # Originalbild laden
    img = keras.preprocessing.image.load_img(img_path)
    img = keras.preprocessing.image.img_to_array(img)

    # Heatmap einfärben
    heatmap = np.uint8(255 * heatmap)
    jet = cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    # Heatmap auf Bildgröße skalieren
    jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)

    # Überlagern
    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)
    superimposed_img.save(save_path)

# --- HELPER: BILD-AUSWAHL MIT FALLBACK ---
def select_images_robust(candidates, target_count, min_size):
    """
    Versucht target_count Bilder >= min_size zu finden.
    Wenn nicht genug da sind, füllt es mit den nächstbesten (kleineren) Bildern auf.
    """
    valid_large = []
    valid_small = []
    
    for img_path in candidates:
        try:
            img = keras.utils.load_img(img_path)
            w, h = img.size
            if w >= min_size and h >= min_size:
                valid_large.append(img_path)
            else:
                # Wir merken uns auch die kleinen, inkl. Größe für Sortierung
                valid_small.append((img_path, w*h)) 
        except:
            continue
            
    # Zufallsauswahl bei den großen (um Bias zu vermeiden)
    rng = np.random.RandomState(EXPERIMENT_SEED)
    rng.shuffle(valid_large)
    
    selected = []
    
    # 1. Nimm so viele Große wie möglich (bis target_count)
    take_large = min(len(valid_large), target_count)
    selected.extend(valid_large[:take_large])
    
    # 2. Prüfen ob wir noch mehr brauchen
    missing = target_count - len(selected)
    
    if missing > 0:
        if len(valid_small) > 0:
            print(f"    -> Info: Nur {len(selected)} Bilder > {min_size}px gefunden. Fülle {missing} mit kleineren Bildern auf.")
            # Sortiere kleine Bilder nach Größe (Pixelanzahl), absteigend -> Die "größten Kleinen" zuerst
            valid_small.sort(key=lambda x: x[1], reverse=True)
            
            # Die Pfade der größten kleinen Bilder holen
            top_small_paths = [x[0] for x in valid_small[:missing]]
            selected.extend(top_small_paths)
        else:
            print(f"    -> WARNUNG: Auch mit kleinen Bildern konnten nur {len(selected)} gefunden werden!")
            
    return selected

# --- HAUPTPROGRAMM ---

# 1. Daten laden und splitten
print("Starte Daten-Selektion mit GROUP-AWARE SPLIT...")
data_dir = pathlib.Path(PLANTDOC_DIR)

# Sortieren für deterministische Reihenfolge (WICHTIG!)
image_paths = sorted(list(data_dir.glob('*/*')))
image_paths = [str(path) for path in image_paths if not pathlib.Path(path).name.startswith('.')]

class_names = sorted([item.name for item in data_dir.glob('*') if item.is_dir()])
class_to_index = {name: i for i, name in enumerate(class_names)}

all_labels = []
all_groups = []

for path in image_paths:
    # Label
    label = class_to_index[pathlib.Path(path).parent.name]
    all_labels.append(label)
    
    # Group ID (für GroupKFold)
    filename = os.path.basename(path)
    if "_crop_" in filename:
        group_id = filename.rsplit("_crop_", 1)[0]
    else:
        group_id = filename
    all_groups.append(group_id)

# Split reproduzieren (Group-Aware!)
# Nutzen desselben Seeds und derselben Logik wie im Training
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=EXPERIMENT_SEED)

# Erster Fold ist das Test-Set
_, test_idx = next(cv.split(image_paths, all_labels, all_groups))

# Test-Pfade und Test-Labels
test_paths = [image_paths[i] for i in test_idx]
test_labels = [all_labels[i] for i in test_idx]

print(f"Test-Set rekonstruiert: {len(test_paths)} Bilder verfügbar.")


# 2. Bilder auswählen
print(f"Filtere Test-Set nach den {len(TARGET_CLASSES)} Zielklassen (Ziel: {IMAGES_PER_CLASS} Bilder/Klasse)...")
selected_paths = []

for target_class in TARGET_CLASSES:
    if target_class not in class_names:
        print(f"WARNUNG: Klasse '{target_class}' nicht im Datensatz gefunden!")
        continue
        
    class_idx = class_to_index[target_class]
    
    # Kandidaten finden (nur im sauberen Test-Set suchen!)
    candidates = [test_paths[i] for i, label_idx in enumerate(test_labels) if label_idx == class_idx]
    
    # Robuste Auswahl aufrufen
    print(f"  Prüfe Klasse: {target_class} (Verfügbar im Testset: {len(candidates)})")
    chosen = select_images_robust(candidates, IMAGES_PER_CLASS, MIN_PIXEL_SIZE)
    
    selected_paths.extend(chosen)
    print(f"  -> {len(chosen)} Bilder final ausgewählt.")

print(f"Insgesamt ausgewählt: {len(selected_paths)} Bilder.")
path_to_id = {path: i+1 for i, path in enumerate(selected_paths)}

# 3. Ordner vorbereiten
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR)

blind_data = []

def get_img_array(img_path, size):
    img = keras.preprocessing.image.load_img(img_path, target_size=size)
    array = keras.preprocessing.image.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    return array

# 4. Heatmaps generieren
print("\nStarte Heatmap-Generierung...")

for model_key, filename in model_files.items():
    full_model_path = os.path.join(MODELS_DIR, filename)
    if not os.path.exists(full_model_path):
        print(f"WARNUNG: {filename} fehlt.")
        continue
        
    print(f"Lade {model_key}...")
    model = keras.models.load_model(full_model_path)
    last_conv_layer = find_target_layer(model)
            
    for img_path in selected_paths:
        img_id = path_to_id[img_path]
        true_label_name = pathlib.Path(img_path).parent.name
        
        try:
            img_array = get_img_array(img_path, IMG_SIZE)
            
            # Vorhersage holen
            preds = model.predict(img_array, verbose=0)
            pred_idx = np.argmax(preds[0])
            pred_label_name = class_names[pred_idx]
            confidence = np.max(preds[0])
            
            # PREDICTED vs TRUE CLASS
            target_class_index = pred_idx
            
            # Ausnahme: Bacterial Spot (Die Phantom-Klasse) -> Force Ground Truth
            if true_label_name == "Tomato___Bacterial_spot":
                 target_class_index = class_to_index[true_label_name]
            
            # Heatmap berechnen
            heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer, pred_index=target_class_index)
            
            # Speichern (Hash-Name)
            random_suffix = str(uuid.uuid4())[:8]
            unique_filename = f"{img_id:03d}_{random_suffix}.jpg"
            save_path = os.path.join(OUTPUT_DIR, unique_filename)
            
            save_and_display_gradcam(img_path, heatmap, save_path, alpha=HEATMAP_ALPHA)

            blind_data.append({
                "Rating_ID (Filename)": unique_filename,
                "Image_ID": img_id,
                "Model (Hidden)": model_key, 
                "True_Class": true_label_name,
                "Pred_Class": pred_label_name, # Wird nur im Key gespeichert
                "Target_Class_Used_For_Heatmap": class_names[target_class_index],
                "Correct": (true_label_name == pred_label_name),
                "Confidence": confidence
            })
        except Exception as e:
            print(f"Fehler bei Bild {img_id}: {e}")

# 5. Export
if blind_data:
    df_full = pd.DataFrame(blind_data)
    df_full = df_full.sort_values(by=["Image_ID", "Rating_ID (Filename)"])

    # KEY FILE (Lösung)
    df_full.to_csv("blind_rating_KEY_FINAL.csv", index=False)

    # BEWERTUNGSBOGEN (Arbeitsdatei)
    df_rating = df_full[["Rating_ID (Filename)", "Image_ID", "True_Class"]].copy() 
    df_rating["Rating (1=Noise, 3=Mix, 5=Precise)"] = ""
    
    df_rating.to_csv("BEWERTUNGSBOGEN_FINAL.csv", index=False)

    print("\nFERTIG!")
    print(f"Bilder in: {OUTPUT_DIR}")
    print("Dateien:")
    print("1. blind_rating_KEY_FINAL.csv (LÖSUNG)")
    print("2. BEWERTUNGSBOGEN_FINAL.csv (ARBEITSDATEI)")
else:
    print("Keine Daten generiert.")

2026-01-05 15:16:55.226614: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-01-05 15:16:55.250310: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-01-05 15:16:55.250339: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2026-01-05 15:16:55.266552: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Starte Daten-Selektion mit GROUP-AWARE SPLIT...
Test-Set rekonstruiert: 1823 Bilder verfügbar.
Filtere Test-Set nach den 5 Zielklassen (Ziel: 10 Bilder/Klasse)...
  Prüfe Klasse: Tomato___Late_blight (Verfügbar im Testset: 27)
  -> 10 Bilder final ausgewählt.
  Prüfe Klasse: Tomato___Tomato_Yellow_Leaf_Curl_Virus (Verfügbar im Testset: 175)
    -> Info: Nur 3 Bilder > 300px gefunden. Fülle 7 mit kleineren Bildern auf.
  -> 10 Bilder final ausgewählt.
  Prüfe Klasse: Squash___Powdery_mildew (Verfügbar im Testset: 42)
  -> 10 Bilder final ausgewählt.
  Prüfe Klasse: Tomato___Bacterial_spot (Verfügbar im Testset: 50)
  -> 10 Bilder final ausgewählt.
  Prüfe Klasse: Tomato___healthy (Verfügbar im Testset: 70)
    -> Info: Nur 8 Bilder > 300px gefunden. Fülle 2 mit kleineren Bildern auf.
  -> 10 Bilder final ausgewählt.
Insgesamt ausgewählt: 50 Bilder.

Starte Heatmap-Generierung...
Lade Model_A...


2026-01-05 15:17:02.115677: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-01-05 15:17:02.203379: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-01-05 15:17:02.203460: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-01-05 15:17:02.205745: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-01-05 15:17:02.205809: I external/local_xla/xla/stream_executor

Target Layer gefunden: efficientnetb0 (Shape: (None, 7, 7, 1280))


2026-01-05 15:17:12.751979: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
Expected: ['input_layer']
Received: inputs=Tensor(shape=(1, 224, 224, 3))
  jet = cm.get_cmap("jet")


Lade Model_B...
Target Layer gefunden: efficientnetb0 (Shape: (None, 7, 7, 1280))
Lade Model_C...


  saveable.load_own_variables(weights_store.get(inner_path))


Target Layer gefunden: efficientnetb0 (Shape: (None, 7, 7, 1280))
Lade Model_D...
Target Layer gefunden: efficientnetb0 (Shape: (None, 7, 7, 1280))

FERTIG!
Bilder in: ./blind_study
Dateien:
1. blind_rating_KEY_FINAL.csv (LÖSUNG)
2. BEWERTUNGSBOGEN_FINAL.csv (ARBEITSDATEI)
