# Reproducibility Experiment - Carl et al. (2020)

## Imports

In [1]:
from pathlib import Path
from PIL import Image
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import decode_predictions
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

2025-09-01 19:40:53.104867: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-01 19:40:53.166817: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-01 19:40:53.218281: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756748453.263835  212988 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756748453.276603  212988 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1756748453.377035  212988 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

## Setup
### Global variables

In [2]:
DATA_PATH = Path("data/kaggle-90-different-animals")

### Load the model

In [3]:
model = InceptionResNetV2(weights="imagenet")
target_size = model.input_shape[1:3] # the required image dimensions (299, 299)

2025-09-01 19:40:57.242116: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


### Load the images into a test data set
For the experiment, 90 common animals are chosen. They are sourced from Google Images and provided in a labeled format in a public [Kaggle dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals).

In [4]:
def load_normalized_image(path, target_size=target_size):
    image = Image.open(path).convert("RGB")
    image = image.resize(target_size)
    return np.array(image) / 255.0 # 1d array with floats from 0 to 1 as input for neural network

In [15]:
wildlife_images = []
labels = []

animal_species = sorted([d.name for d in DATA_PATH.iterdir() if d.is_dir()])

for species_name in animal_species:
    animal_image_folder = DATA_PATH / species_name # every species has its image folder
    for image_path in animal_image_folder.glob("*.jpg"):
        image_array = load_normalized_image(image_path)
        wildlife_images.append(image_array)
        labels.append(species_name)

X_test = np.stack(wildlife_images, axis=0) # there is no training phase, so all samples are test samples
y_true = labels

## Test

In [16]:
y_pred = model.predict(X_test)
y_pred = [pred[0][1] for pred in decode_predictions(y_pred, top=1)] # only take the top prediction label

2025-09-01 19:49:26.471946: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 965530800 exceeds 10% of free system memory.


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 4s/step


In [21]:
species_recognition_result = pd.DataFrame({
    "y_true": y_true,
    "y_pred": y_pred
})

Unnamed: 0,y_true,y_pred
0,antelope,gazelle
1,antelope,impala
2,antelope,impala
3,antelope,gazelle
4,antelope,gazelle
...,...,...
895,zebra,zebra
896,zebra,zebra
897,zebra,zebra
898,zebra,zebra


### Map the inferred labels

In [50]:
imagenet_to_kaggle = {
    "gazelle": "antelope",
    "impala": "antelope",
    "American_black_bear": "bear",
    "brown_bear": "bear",
    "ground_beetle": "beetle",
    "leaf_beetle": "beetle",
    "rhinoceros_beetle": "beetle",
    "dung_beetle": "beetle",
    "wild_boar": "boar",
    "ringlet": "butterfly",
    "monarch": "butterfly",
    "sulphur_butterfly": "butterfly",
    "lycaenid": "butterfly",
    "Egyptian_cat": "cat",
    "tabby": "cat",
    "Siamese_cat": "cat",
    "Persian_cat": "cat",
    "lynx": "cat",
    "ox": "cow",
    "water_buffalo": "cow",
    "Dungeness_crab": "crab",
    "red_deer": "deer",
    "elk": "deer",
    "Labrador_retriever": "dog",
    "Border_collie": "dog",
    "Chihuahua": "dog",
    "Bouvier_des_Flandres": "dog",
    "Brittany_spaniel": "dog",
    "English_setter": "dog",
    "Greater_Swiss_Mountain_dog": "dog",
    "Ibizan_hound": "dog",
    "Mexican_hairless": "dog",
    "Pekinese": "dog",
    "Pomeranian": "dog", 
    "golden_retriever": "dog",
    "pug": "dog",
    "ass": "donkey",
    "mallard": "duck",
    "bald_eagle": "eagle",
    "golden_eagle": "eagle",
    "African_elephant": "elephant",
    "Indian_elephant": "elephant",
    "Arctic_fox": "fox",
    "red_fox": "fox",
    "ibex": "goat",
    "mountain_goat": "goat",
    "Arabian_horse": "horse",
    "Appaloosa": "horse",
    "wallaby": "kangaroo",
    "agama": "lizard",
    "alligator_lizard": "lizard",
    "Komodo_dragon": "lizard",
    "American_lobster": "lobster",
    "house_mouse": "mouse",
}

species_recognition_result["y_pred_mapped"] = species_recognition_result["y_pred"].map(
    lambda l: imagenet_to_kaggle.get(l, l)
)

In [38]:
species_recognition_result

Unnamed: 0,y_true,y_pred,y_pred_mapped
0,antelope,gazelle,antelope
1,antelope,impala,antelope
2,antelope,impala,antelope
3,antelope,gazelle,antelope
4,antelope,gazelle,antelope
...,...,...,...
895,zebra,zebra,zebra
896,zebra,zebra,zebra
897,zebra,zebra,zebra
898,zebra,zebra,zebra


## Evaluate

In [51]:
accuracy_score(species_recognition_result["y_true"], species_recognition_result["y_pred_mapped"])

0.45111111111111113

In [52]:
species_recognition_result.assign(correct = species_recognition_result["y_true"] == species_recognition_result["y_pred_mapped"]).groupby("y_true")["correct"].mean().sort_values(ascending=False)

y_true
bison         1.0
bear          1.0
boar          1.0
eagle         1.0
cockroach     1.0
             ... 
turkey        0.0
whale         0.0
turtle        0.0
wolf          0.0
woodpecker    0.0
Name: correct, Length: 90, dtype: float64

In [53]:
species_recognition_result[species_recognition_result["y_pred_mapped"] != species_recognition_result["y_true"]]

Unnamed: 0,y_true,y_pred,y_pred_mapped
6,antelope,ibex,goat
14,badger,American_black_bear,bear
20,bat,hummingbird,hummingbird
21,bat,wood_rabbit,wood_rabbit
22,bat,hook,hook
...,...,...,...
885,woodpecker,hornbill,hornbill
886,woodpecker,bittern,bittern
887,woodpecker,bittern,bittern
888,woodpecker,hornbill,hornbill


In [54]:
species_recognition_result[species_recognition_result["y_pred_mapped"] != species_recognition_result["y_true"]].to_csv("species_recognition_result2.csv", index=False)
