In [25]:
import pandas as pd
import numpy as np
import os
from tensorflow import keras
from PIL import Image

# Load test metadata
test_data = pd.read_csv('/Users/benerugg/DataScience/deepLearning/zwischenspeicher/deep_learning/data/test_metadata.csv')
print(f"Loaded {len(test_data)} test samples")

# Base paths
base_path = '/Users/benerugg/DataScience/deepLearning/zwischenspeicher/deep_learning/data/rare_species 1'
models_base_path = '/Users/benerugg/DataScience/deepLearning/zwischenspeicher/deep_learning/models'

# Count correct predictions
correct = 0
total = 0

# Function to preprocess image
def preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((224, 224))
    img_array = np.array(img)
    
    # Handle grayscale or RGBA images
    if len(img_array.shape) == 2:
        img_array = np.stack((img_array,) * 3, axis=-1)
    if len(img_array.shape) > 2 and img_array.shape[2] == 4:
        img_array = img_array[:, :, :3]
    
    return img_array 

# Process each image
for _, row in test_data.iterrows():
    phylum = row['phylum']
    family = row['family']
    
    # Skip if no model for this phylum
    model_path = os.path.join(models_base_path, f'best_model_{phylum.lower()}.keras')
    if not os.path.exists(model_path):
        continue
    
    # Load model if not already loaded
    if not 'current_phylum' in locals() or current_phylum != phylum:
        # Clean up previous model if exists
        if 'model' in locals():
            del model
            import gc
            gc.collect()
        
        model = keras.models.load_model(model_path)
        current_phylum = phylum
        
        # Get families for this phylum
        families = sorted(test_data[test_data['phylum'] == phylum]['family'].unique())
    
    # Process image
    image_path = os.path.join(base_path, row['file_path'])
    if not os.path.exists(image_path):
        continue
    
    try:
        # Preprocess and predict
        img_array = preprocess_image(image_path)
        img_batch = np.expand_dims(img_array, axis=0)
        prediction = model.predict(img_batch, verbose=0)
        predicted_idx = np.argmax(prediction[0])
        
        # Check if prediction is correct
        if predicted_idx < len(families):
            predicted_family = families[predicted_idx]
            if predicted_family == family:
                correct += 1
            total += 1
    except Exception:
        continue

# Calculate and print overall accuracy
accuracy = correct / total if total > 0 else 0
print(f"Overall accuracy: {accuracy:.4f} ({correct}/{total})")

Loaded 1798 test samples
Overall accuracy: 0.7116 (1187/1668)
