In [1]:
import os
import requests
import pandas as pd
from cryptovision.tools import image_directory_to_pandas

In [2]:
def fetch_inaturalist_observations(taxon_name, rank, per_page=30, page=1):
    """
    Fetch observations for a given taxon name and rank from iNaturalist API.
    
    Args:
        taxon_name (str): Name of the taxon (family, genus, or species).
        rank (str): The rank of the taxon ('family', 'genus', 'species').
        per_page (int): Number of observations to fetch per page.
        page (int): Page number to fetch.

    Returns:
        dict: JSON response from the API containing observations.
    """
    url = "https://api.inaturalist.org/v1/observations"
    params = {
        "taxon_name": taxon_name,
        "rank": rank,
        "per_page": per_page,
        "page": page,
        "photos": True
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching data from iNaturalist: {response.status_code}")
        return None

def download_image(url, save_path):
    """
    Download an image from a URL and save it to the specified path.

    Args:
        url (str): URL of the image to download.
        save_path (str): Local path to save the image.
    """
    try:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            with open(save_path, 'wb') as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"Image downloaded: {save_path}")
        else:
            print(f"Failed to download image from {url} (Status code: {response.status_code})")
    except Exception as e:
        print(f"Error downloading image: {e}")

def download_taxon_images(taxon_name, rank, download_dir, max_images=10):
    """
    Download images of a specific taxon (family, genus, or species) from iNaturalist.

    Args:
        taxon_name (str): Name of the taxon.
        rank (str): Rank of the taxon ('family', 'genus', 'species').
        download_dir (str): Directory to save the downloaded images.
        max_images (int): Maximum number of images to download.
    """
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    page = 1
    downloaded_count = 0
    while downloaded_count < max_images:
        observations = fetch_inaturalist_observations(taxon_name, rank, page=page)
        if not observations or not observations.get('results'):
            print("No more observations found.")
            break
        
        for result in observations['results']:
            if downloaded_count >= max_images:
                break
            for photo in result.get('photos', []):
                image_url = photo.get('url')
                if image_url:
                    # Construct the URL for the original-sized image
                    original_url = image_url.replace("square", "original")
                    image_id = photo.get('id')
                    extension = original_url.split('.')[-1]
                    save_path = os.path.join(download_dir, f"{taxon_name}_{image_id}.{extension}")
                    download_image(original_url, save_path)
                    downloaded_count += 1

        page += 1

    print(f"Downloaded {downloaded_count} images for taxon '{taxon_name}' with rank '{rank}'.")
    


In [8]:
df_lab = image_directory_to_pandas(
        "/Users/leonardo/Library/CloudStorage/Box-Box/CryptoVision/Data/fish_functions/Species_v02")
df_web = image_directory_to_pandas(
    "/Users/leonardo/Library/CloudStorage/Box-Box/CryptoVision/Data/web/Species_v01")
df_inatlist = image_directory_to_pandas(
    "/Users/leonardo/Library/CloudStorage/Box-Box/CryptoVision/Data/inaturalist/Species_v02")
df_chris = image_directory_to_pandas(
    "/Volumes/T7_shield/CryptoVision/Data/others/hemingson_photos/others_organized/Species"
)
df_chris_lirs = image_directory_to_pandas(
    "/Volumes/T7_shield/CryptoVision/Data/others/hemingson_photos/LIRS23_organized/Species"
)
df_ll = image_directory_to_pandas(
    "/Volumes/T7_shield/CryptoVision/Data/others/jeannot_photos/cv_organized/03_Species"
)

df = pd.concat([df_lab, df_web, df_inatlist, df_chris, df_chris_lirs, df_ll], ignore_index=True, axis=0)


In [15]:
all_species = df['folder_label'].unique().tolist()

print(f"Amount Unique Species: {len(all_species)}")

Amount Unique Species: 192


In [14]:
for species in all_species:
    
    save_dir = '/Volumes/T7_shield/CryptoVision/Data/inaturalist/Species_v03'
    
    # Check if the folder already exists
    if not os.path.exists(os.path.join(save_dir, species)):
        download_taxon_images(
            f"{species.split('_')[1]} {species.split('_')[2]}", 
            'species', 
            os.path.join(save_dir, species), 
            max_images=200
        )

Image downloaded: /Volumes/T7_shield/CryptoVision/Data/inaturalist/Species_v03/Tripterygiidae_Helcogramma_gymnauchen/Helcogramma gymnauchen_442107141.jpeg
Image downloaded: /Volumes/T7_shield/CryptoVision/Data/inaturalist/Species_v03/Tripterygiidae_Helcogramma_gymnauchen/Helcogramma gymnauchen_204044755.jpeg
No more observations found.
Downloaded 2 images for taxon 'Helcogramma gymnauchen' with rank 'species'.
Image downloaded: /Volumes/T7_shield/CryptoVision/Data/inaturalist/Species_v03/Gobiidae_Gobiodon_bilineatus/Gobiodon bilineatus_210526905.jpg
No more observations found.
Downloaded 1 images for taxon 'Gobiodon bilineatus' with rank 'species'.
Image downloaded: /Volumes/T7_shield/CryptoVision/Data/inaturalist/Species_v03/Gobiidae_Priolepis_cincta/Priolepis cincta_458450771.jpeg
Image downloaded: /Volumes/T7_shield/CryptoVision/Data/inaturalist/Species_v03/Gobiidae_Priolepis_cincta/Priolepis cincta_456439704.jpeg
Image downloaded: /Volumes/T7_shield/CryptoVision/Data/inaturalist/Sp