<a href="https://colab.research.google.com/github/mdhornstein/fossil-embeddings/blob/main/Fossil_Embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from IPython.display import display, Image, clear_output
import ipywidgets as widgets

In [None]:
# Install wget and unzip if they aren't already available
!apt-get install wget unzip

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unzip is already the newest version (6.0-26ubuntu3.2).
wget is already the newest version (1.21.2-2ubuntu1.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
# Download the reduced-FID dataset (approx. 1.8 GB)
!wget https://zenodo.org/records/6333970/files/reduced-FID.zip

--2025-08-23 19:26:53--  https://zenodo.org/records/6333970/files/reduced-FID.zip
Resolving zenodo.org (zenodo.org)... 188.185.45.92, 188.185.43.25, 188.185.48.194, ...
Connecting to zenodo.org (zenodo.org)|188.185.45.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1819654497 (1.7G) [application/octet-stream]
Saving to: ‘reduced-FID.zip’


2025-08-23 19:27:58 (27.1 MB/s) - ‘reduced-FID.zip’ saved [1819654497/1819654497]



In [None]:
!unzip reduced-FID.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: reduced-FID/stromatolite/stromatolite-107.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-109.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-114562-400-300.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-119.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-120.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-131.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-139.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-14.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-144.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-145.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-147.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-152.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-155.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-157.jpg  
  inflating: reduced-FID/stromatolite/stromatolite-16.jpg  
  inflating

In [None]:
!ls

reduced-FID  reduced-FID.zip  sample_data


In [None]:
# Verify the folder structure. You should see a folder named 'reduced-FID'.
# The images are organized into subfolders by clade (e.g., reduced-FID/Agnatha, reduced-FID/Amphibia)
!ls -l reduced-FID

total 4020
drwxr-xr-x 2 root root  94208 Jan  4  2022  agnatha
drwxr-xr-x 2 root root  81920 Jan  4  2022  ammonoid
drwxr-xr-x 2 root root  81920 Jan  4  2022  amphibian
drwxr-xr-x 2 root root  69632 Jan  4  2022  angiosperm
drwxr-xr-x 2 root root  81920 Jan  4  2022  avialae
drwxr-xr-x 2 root root  94208 Jan  4  2022  belemnite
drwxr-xr-x 2 root root  69632 Jan  4  2022  bivalve
drwxr-xr-x 2 root root  86016 Jan  4  2022  blastoid
drwxr-xr-x 2 root root  69632 Jan  4  2022  bone_fragment
drwxr-xr-x 2 root root  69632 Jan  4  2022  brachiopod
drwxr-xr-x 2 root root  73728 Jan  4  2022  bryozoan
drwxr-xr-x 2 root root  77824 Jan  4  2022  chelicerate
drwxr-xr-x 2 root root  73728 Jan  4  2022  chondrichthyes
drwxr-xr-x 2 root root  86016 Jan  4  2022  conodont
drwxr-xr-x 2 root root  69632 Jan  4  2022  coral
drwxr-xr-x 2 root root  81920 Jan  4  2022  crinoid
drwxr-xr-x 2 root root  86016 Jan  4  2022  crocodylomorph
drwxr-xr-x 2 root root  73728 Jan  4  2022  crustacean
drwxr-xr-x 2 r

In [None]:
!ls reduced-FID/agnatha | head -10

045.jpg
061025.lamprey2-200.jpg
0D4AAOSwnBRgNsn~s-l1600.jpg
100.tumblr_msal0qg4oo1sh1ns2o2_500 (2).jpg
102.pict0155.jpg
102.tully_monster.jpg
102.tumblr_mn5i3ng5ss1spmwbxo1_400.jpg
103.48320371757_f14a613401_m.jpg
106.img_20200811_170136.jpg.ffaacadf2fa3b4a173b996acbbd58163.jpg
107.acd494bf0b97b6706cf53918be87cb2e.jpg


In [None]:
# --- Configuration ---
# Assuming 'reduced-FID' is directly in the Colab environment's root
DATASET_ROOT = './reduced-FID' # Changed to local path

In [None]:
# --- 1. Get list of fossil clades ---
def get_fossil_clades(root_dir):
    """
    Scans the root directory for subfolders, which represent fossil clades.
    """
    if not os.path.exists(root_dir):
        print(f"Error: Dataset root directory not found at {root_dir}")
        return []

    clades = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    clades.sort() # Sort alphabetically for easier navigation
    return clades

In [None]:
# --- 2. Get images for a selected clade ---
def get_clade_images(clade_name):
    """
    Returns a list of JPG image paths within a given fossil clade subfolder.
    """
    clade_path = os.path.join(DATASET_ROOT, clade_name)
    if not os.path.exists(clade_path):
        print(f"Error: Clade path not found for {clade_name} at {clade_path}")
        return []

    image_files = [f for f in os.listdir(clade_path) if f.lower().endswith('.jpg')]
    image_files.sort() # Sort alphabetically
    return image_files

In [None]:
# --- Initialize data ---
fossil_clades = get_fossil_clades(DATASET_ROOT)

if not fossil_clades:
    print(f"No fossil clades found in {DATASET_ROOT}. Please ensure the 'reduced-FID' folder is directly uploaded/present in your Colab environment.")
else:
    print(f"Found {len(fossil_clades)} fossil clades.")

Found 50 fossil clades.


In [None]:
# --- Widgets ---

# Dropdown for selecting a fossil clade
clade_selector = widgets.Dropdown(
    options=fossil_clades,
    description='Fossil Clade:',
    disabled=False,
    style={'description_width': 'initial'}
)

# Dropdown for selecting an image within the chosen clade
image_selector = widgets.Dropdown(
    options=[], # Will be populated dynamically
    description='Image File:',
    disabled=False,
    style={'description_width': 'initial'}
)

# Output widget to display the image
image_output = widgets.Output()

# --- Callbacks ---

def on_clade_change(change):
    """
    Updates the image_selector options when a new clade is chosen.
    """
    if change['new']:
        selected_clade = change['new']
        images_in_clade = get_clade_images(selected_clade)
        image_selector.options = images_in_clade
        # Select the first image if any are available
        if images_in_clade:
            image_selector.value = images_in_clade[0]
        else:
            image_selector.value = None
            with image_output:
                clear_output(wait=True)
                print("No JPG images found in this clade.")

def on_image_change(change):
    """
    Displays the selected image.
    """
    if change['new']:
        selected_clade = clade_selector.value
        selected_image = change['new']

        if selected_clade and selected_image:
            image_path = os.path.join(DATASET_ROOT, selected_clade, selected_image)
            with image_output:
                clear_output(wait=True)
                try:
                    display(Image(filename=image_path, width=400)) # Adjust width as needed
                except Exception as e:
                    print(f"Could not display image: {e}")
                    print(f"Image path: {image_path}")

# --- Link widgets and run initial update ---

clade_selector.observe(on_clade_change, names='value')
image_selector.observe(on_image_change, names='value')

# Trigger initial update for clade_selector to populate image_selector
if fossil_clades:
    clade_selector.value = fossil_clades[0]

# Display the widgets
print("\n--- Image Selector Widgets ---")
display(clade_selector, image_selector, image_output)

# Initial image display (if any images are available)
if clade_selector.value and image_selector.value:
    on_image_change({'new': image_selector.value})



--- Image Selector Widgets ---


Dropdown(description='Fossil Clade:', options=('agnatha', 'ammonoid', 'amphibian', 'angiosperm', 'avialae', 'b…

Dropdown(description='Image File:', options=(), style=DescriptionStyle(description_width='initial'), value=Non…

Output()