In [1]:
import random
import tensorflow as tf
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing import image
import json 
import pandas 

In [2]:
# Data path
BASE_DIR = Path("/Users/ma/Library/CloudStorage/GoogleDrive-mdhornstein@gmail.com/My Drive/cv_paleontology/")
FID_DATA_PATH = BASE_DIR / "data" / "reduced-FID"

# Model path
MODEL_PATH = Path("../models/fid_classification/tf1_from_community_allow_train/1")

In [3]:
print("Loading model...")
model = tf.saved_model.load(str(MODEL_PATH))   # <-- TF1-style load
print("Model loaded!")

Loading model...
Model loaded!


In [4]:
infer = model.signatures["serving_default"]
print("Signature keys:", list(model.signatures.keys()))
print("Inputs:", infer.structured_input_signature)
print("Outputs:", infer.structured_outputs)

Signature keys: ['serving_default']
Inputs: ((), {'input': TensorSpec(shape=(None, 299, 299, 3), dtype=tf.float32, name='input')})
Outputs: {'logits': <tf.Tensor 'InceptionResnetV2/Logits/Logits/BiasAdd:0' shape=(None, 50) dtype=float32>, 'predictions': <tf.Tensor 'predictions:0' shape=(None, 50) dtype=float32>}


In [5]:
# Test that the model is loaded by applying it to a dummy image 

# Create a dummy image (batch size 1, 299x299 RGB)
dummy = np.random.rand(1, 299, 299, 3).astype(np.float32)

# Run inference
outputs = infer(input=tf.convert_to_tensor(dummy))

print("Available outputs:", outputs.keys())
print("Logits shape:", outputs["logits"].shape)
print("Predictions shape:", outputs["predictions"].shape)

# Check top prediction
preds = outputs["predictions"].numpy()
top_class = np.argmax(preds, axis=1)[0]
print("Top predicted class index:", top_class)
print("Prob distribution (first 5 classes):", preds[0, :5])

Available outputs: dict_keys(['logits', 'predictions'])
Logits shape: (1, 50)
Predictions shape: (1, 50)
Top predicted class index: 47
Prob distribution (first 5 classes): [0.00080941 0.00347048 0.00939777 0.02229282 0.0122939 ]


In [6]:
# Get class names from directory structure
class_names = sorted([d.name for d in FID_DATA_PATH.iterdir() if d.is_dir()])
print("Classes:", class_names)

Classes: ['agnatha', 'ammonoid', 'amphibian', 'angiosperm', 'avialae', 'belemnite', 'bivalve', 'blastoid', 'bone_fragment', 'brachiopod', 'bryozoan', 'chelicerate', 'chondrichthyes', 'conodont', 'coral', 'crinoid', 'crocodylomorph', 'crustacean', 'echinoid', 'foraminifer', 'gastropod', 'graptolite', 'gymnosperm', 'insect', 'mammal', 'mammal_teeth', 'marine_reptile', 'myriapod', 'nautiloid', 'ophiuroid', 'ornithischian', 'osteichthyes', 'ostracod', 'petrified_wood', 'placoderms', 'pteridophyte', 'pterosaurs', 'radiolarian', 'reptile_teeth', 'sauropodomorph', 'shark_teeth', 'snake', 'sponge', 'spore_or_pollen', 'starfish', 'stromatolite', 'theropod', 'trace_fossil', 'trilobite', 'turtle']


In [7]:
# --- Preprocessing ---
IMG_SIZE = 299
def preprocess_image_keras_normalization(path):
    img = tf.io.read_file(str(path))                # streamed directly from Drive
    img = tf.image.decode_jpeg(img, channels=3)
    img_resized = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img_norm = tf.keras.applications.inception_resnet_v2.preprocess_input(img_resized)
    return img, img_norm

In [8]:
def predict(img_norm):
    batch = tf.expand_dims(img_norm, axis=0)
    output = infer(input=batch)
    return output["predictions"].numpy()[0]

In [14]:
# Collect predictions for each clade
all_predictions = []

for clade in class_names: 
    clade_folder = FID_DATA_PATH / clade 
    imgs = [p for p in clade_folder.iterdir() if p.suffix.lower() in [".jpg", ".jpeg", ".png"]]
    if not imgs:
        continue
    print(f"# images: {len(imgs)}")

    # Sample 3 random images 
    sample_size = min(3, len(imgs))
    rng = random.Random(42)
    sample_imgs = rng.sample(imgs, sample_size)

    for img_path in sample_imgs:
        _, img_norm = preprocess_image_keras_normalization(img_path)
        probs = predict(img_norm)
        pred_idx = np.argmax(probs)
        pred_conf = probs[pred_idx]

        prediction_result = {
            'true_clade': clade, 
            'pred_idx': pred_idx,
            'confidence': pred_conf,
            'img_name': img_path.name 
        }
        print('prediction result: ', prediction_result)
        all_predictions.append(prediction_result)

        




# images: 1206
prediction result:  {'true_clade': 'agnatha', 'pred_idx': 0, 'confidence': 0.99999964, 'img_name': '102.tully_monster.jpg'}
prediction result:  {'true_clade': 'agnatha', 'pred_idx': 0, 'confidence': 0.50946766, 'img_name': '15.s-l300.jpg'}
prediction result:  {'true_clade': 'agnatha', 'pred_idx': 0, 'confidence': 0.9999995, 'img_name': '╬▐Єв╙у_152.i10-30-fishes_3.jpg'}
# images: 1199
prediction result:  {'true_clade': 'ammonoid', 'pred_idx': 1, 'confidence': 0.99619883, 'img_name': '51Twx39QsVL.jpg'}
prediction result:  {'true_clade': 'ammonoid', 'pred_idx': 1, 'confidence': 0.99593544, 'img_name': '5mgAAOSwn9VaUsPKs-l1600.jpg'}
prediction result:  {'true_clade': 'ammonoid', 'pred_idx': 1, 'confidence': 0.9997197, 'img_name': '45kAAOSwkvFaV~f6s-l1600.jpg'}
# images: 1199
prediction result:  {'true_clade': 'amphibian', 'pred_idx': 39, 'confidence': 0.43233594, 'img_name': '657e16d441b61f_Nr_2D.JPG.3663f72c4acc.jpg'}
prediction result:  {'true_clade': 'amphibian', 'pred_id



prediction result:  {'true_clade': 'avialae', 'pred_idx': 5, 'confidence': 0.9992982, 'img_name': '94.jeholornis_prima.png'}
prediction result:  {'true_clade': 'avialae', 'pred_idx': 5, 'confidence': 0.99994206, 'img_name': 'W7A4696-studio-studio.jpg'}
prediction result:  {'true_clade': 'avialae', 'pred_idx': 5, 'confidence': 0.99486536, 'img_name': '366.s-l300.jpg'}
# images: 1199
prediction result:  {'true_clade': 'belemnite', 'pred_idx': 7, 'confidence': 0.64565283, 'img_name': 'BsQAAOSwEC9dxfYXs-l1600.jpg'}
prediction result:  {'true_clade': 'belemnite', 'pred_idx': 6, 'confidence': 0.9999727, 'img_name': 'belemnitella-mucronata-33.jpg'}
prediction result:  {'true_clade': 'belemnite', 'pred_idx': 6, 'confidence': 0.99939895, 'img_name': 'belemnite_121.l_p1010057_1.jpg'}
# images: 1200
prediction result:  {'true_clade': 'bivalve', 'pred_idx': 7, 'confidence': 0.9986565, 'img_name': '_16C7620.JPG.68e177eb7416d424ae2c4d7acd83bcad.jpg'}
prediction result:  {'true_clade': 'bivalve', 'pr

In [None]:
# Define paths for saving to JSON file 

RESULTS_DIR = Path("..") / "results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

SAVE_PATH = RESULTS_DIR / "index_to_clade_prediction_results.json"

In [None]:
# Save to JSON file 
def to_python(obj):
    if isinstance(obj, np.generic):  # catches np.int64, np.float32, etc.
        return obj.item()
    raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")

with SAVE_PATH.open("w") as f:
    json.dump(all_predictions, f, indent=2, default=to_python)

In [9]:
# Load JSON back into Python
with SAVE_PATH.open("r") as f:
    loaded_predictions = json.load(f)

# Now `loaded_predictions` is a dict/list structure
print(type(loaded_predictions))
print(list(loaded_predictions.items())[:3])  # peek at first few entries if it's a dict

NameError: name 'SAVE_PATH' is not defined

In [None]:
df_preds = pd.DataFrame(all_predictions)

vote_counts = df_preds.groupby(['true_clade', 'pred_idx']).size().reset_index(name='votes')

idx_max = vote_counts.groupby(['true_clade', ])