In [1]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

In [8]:

def fetch_observations(species_name, per_page=30, page=1):
    """
    Fetch observations for a given species name from iNaturalist.
    """
    url = "https://api.inaturalist.org/v1/observations"
    params = {
        "q": species_name,
        "per_page": per_page,
        "page": page,
        "photos": True,
        "taxon_name": species_name,
        "iconic_taxa": "Actinopterygii"  # Ray-finned fishes
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None

def download_image(url, save_path):
    """
    Download an image from a URL and save it to the specified path.
    """
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
    else:
        print(f"Failed to download image: {url}")
        
def download_fish_images(species_name, download_dir, max_images=10):
    """
    Download images of a specific fish species from iNaturalist.
    """
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    observations = fetch_observations(species_name)
    if not observations:
        print("No observations found.")
        return

    count = 0
    for result in observations.get('results', []):
        if count >= max_images:
            break
        for photo in result.get('photos', []):
            image_url = photo.get('url')
            if image_url:
                # Construct the URL for the original-sized image
                original_url = image_url.replace("square", "original")
                image_id = photo.get('id')
                extension = original_url.split('.')[-1]
                save_path = os.path.join(download_dir, f"{species_name}_{image_id}.{extension}")
                download_image(original_url, save_path)
                print(f"Downloaded: {save_path}")
                count += 1
                if count >= max_images:
                    break
                


In [9]:
species = "Priolepis Dawsoni"  # Replace with your target species name
download_directory = f"/Users/leonardo/Documents/Projects/cryptovision/data/raw/{species}"  # Replace with your desired local path
max_images_to_download = 1000  # Set the maximum number of images to download
download_fish_images(species, download_directory, max_images_to_download)

Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Priolepis Dawsoni/Priolepis Dawsoni_19504470.jpg


In [15]:
import requests

def fetch_observations(taxon_name, rank, per_page=30, page=1):
    """
    Fetch observations for a given taxon name and rank from iNaturalist.
    """
    url = "https://api.inaturalist.org/v1/observations"
    params = {
        "taxon_name": taxon_name,
        "rank": rank,
        "per_page": per_page,
        "page": page,
        "photos": True
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None
    
import os

def download_image(url, save_path):
    """
    Download an image from a URL and save it to the specified path.
    """
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
    else:
        print(f"Failed to download image: {url}")
        
def download_taxon_images(taxon_name, rank, download_dir, max_images=10):
    """
    Download images of a specific taxon (genus or family) from iNaturalist.
    """
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    observations = fetch_observations(taxon_name, rank)
    if not observations:
        print("No observations found.")
        return

    count = 0
    for result in observations.get('results', []):
        if count >= max_images:
            break
        for photo in result.get('photos', []):
            image_url = photo.get('url')
            if image_url:
                # Construct the URL for the original-sized image
                original_url = image_url.replace("square", "original")
                image_id = photo.get('id')
                extension = original_url.split('.')[-1]
                save_path = os.path.join(download_dir, f"{taxon_name}_{image_id}.{extension}")
                download_image(original_url, save_path)
                print(f"Downloaded: {save_path}")
                count += 1
                if count >= max_images:
                    break

In [16]:
taxon = "Apogonidae"  # Replace with your target genus or family name
rank = "family"  # Specify the rank: 'genus' or 'family'
download_directory = f"/Users/leonardo/Documents/Projects/cryptovision/data/raw/{taxon}"  # Replace with your desired local path
max_images_to_download = 100  # Set the maximum number of images to download
download_taxon_images(taxon, rank, download_directory, max_images_to_download)

Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_451856063.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_451800259.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_451728700.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_451728732.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_449358769.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_449358783.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_448583756.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_448583321.jpeg
Downloaded: /Users/leonardo/Documents/Projects/cryptovision/data/raw/Apogonidae/Apogonidae_447484909.jpeg
Downloaded: /Users/leonardo/Documents/Projects

In [14]:
import requests

def fetch_species_by_genus(genus_name):
    """
    Fetch species data for a given genus from FishBase.
    """
    url = f"https://fishbase.ropensci.org/species"
    params = {
        "Genus": genus_name
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None
    
import os

def download_image(url, save_path):
    """
    Download an image from a URL and save it to the specified path.
    """
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
    else:
        print(f"Failed to download image: {url}")
        
def download_genus_images(genus_name, download_dir, max_images_per_species=5):
    """
    Download images of all species within a specific genus from FishBase.
    """
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    species_data = fetch_species_by_genus(genus_name)
    if not species_data:
        print("No species found for the given genus.")
        return

    for species in species_data.get('data', []):
        species_name = species.get('Species')
        genus = species.get('Genus')
        if species_name and genus:
            full_species_name = f"{genus} {species_name}"
            print(f"Fetching images for {full_species_name}...")
            # Construct the URL to the species summary page
            species_url = f"https://www.fishbase.se/summary/{genus}-{species_name}.html"
            # Fetch the species summary page
            response = requests.get(species_url)
            if response.status_code == 200:
                # Parse the page to find image URLs
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(response.content, 'html.parser')
                image_tags = soup.find_all('img')
                count = 0
                for img in image_tags:
                    if count >= max_images_per_species:
                        break
                    img_url = img.get('src')
                    if img_url and 'thumbnails' in img_url.lower():
                        # Construct the full image URL
                        img_url = img_url.replace('Thumbnails', 'Pictures')
                        img_url = img_url.replace('tn_', '')
                        if not img_url.startswith('http'):
                            img_url = f"https://www.fishbase.se{img_url}"
                        # Determine the image file name
                        img_name = img_url.split('/')[-1]
                        save_path = os.path.join(download_dir, img_name)
                        # Download the image
                        download_image(img_url, save_path)
                        print(f"Downloaded: {save_path}")
                        count += 1
            else:
                print(f"Failed to fetch species page for {full_species_name}")
                
genus = "Eviota"  # Replace with your target genus or family name
rank = "genus"  # Specify the rank: 'genus' or 'family'
download_directory = f"/Users/leonardo/Documents/Projects/cryptovision/data/raw/fishbase/{genus}"  # Replace with your desired local path
max_images = 5  # Set the maximum number of images to download per species
download_genus_images(genus, download_directory, max_images)

Error: 403
No species found for the given genus.
