In [6]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

In [8]:

def fetch_observations(species_name, per_page=30, page=1):
    """
    Fetch observations for a given species name from iNaturalist.
    """
    url = "https://api.inaturalist.org/v1/observations"
    params = {
        "q": species_name,
        "per_page": per_page,
        "page": page,
        "photos": True,
        "taxon_name": species_name,
        "iconic_taxa": "Actinopterygii"  # Ray-finned fishes
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None

def download_image(url, save_path):
    """
    Download an image from a URL and save it to the specified path.
    """
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
    else:
        print(f"Failed to download image: {url}")
        
def download_fish_images(species_name, download_dir, max_images=10):
    """
    Download images of a specific fish species from iNaturalist.
    """
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    observations = fetch_observations(species_name)
    if not observations:
        print("No observations found.")
        return

    count = 0
    for result in observations.get('results', []):
        if count >= max_images:
            break
        for photo in result.get('photos', []):
            image_url = photo.get('url')
            if image_url:
                # Construct the URL for the original-sized image
                original_url = image_url.replace("square", "original")
                image_id = photo.get('id')
                extension = original_url.split('.')[-1]
                save_path = os.path.join(download_dir, f"{species_name}_{image_id}.{extension}")
                download_image(original_url, save_path)
                print(f"Downloaded: {save_path}")
                count += 1
                if count >= max_images:
                    break
                


In [17]:
species = "Aioliops megastigma"  # Replace with your target species name
download_directory = f"/Users/leonardo/Documents/Projects/cryptovision/data/raw/{species}"  # Replace with your desired local path
max_images_to_download = 1000  # Set the maximum number of images to download
download_fish_images(species, download_directory, max_images_to_download)

Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_378303673.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_375067369.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_374002336.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_369743489.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_369571026.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_367127662.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_366405313.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Aioliops megastigma/Aioliops megastigma_366364738.jpeg
Download