In [1]:
import requests
import pandas as pd

def get_gbif_occurrences(species_name, limit=300, max_records=200000):
    """
    Descarga todas las ocurrencias de una especie desde la API de GBIF.
    Pagina automáticamente hasta conseguir max_records o que no haya más datos.
    """
    offset = 0
    all_records = []

    print("Descargando ocurrencias desde GBIF...")

    while True:
        url = (
            "https://api.gbif.org/v1/occurrence/search?"
            f"scientificName={species_name}&"
            "hasCoordinate=true&"
            "limit=300&"
            f"offset={offset}"
        )

        r = requests.get(url)
        data = r.json()

        batch = data.get("results", [])
        all_records.extend(batch)

        print(f"Descargados: {len(all_records)} registros", end="\r")

        # detener si no hay más datos
        if len(batch) < limit:
            break

        # detener si excedemos lo deseado
        if len(all_records) >= max_records:
            print("\n⚠️ Se alcanzó max_records.")
            break

        offset += limit

    print(f"\nTotal descargado: {len(all_records)} registros")
    return pd.DataFrame(all_records)

In [2]:
# -------------------------
# USO
# -------------------------

species = "Flammulina velutipes"
df_raw = get_gbif_occurrences(species)

print("\nColumnas disponibles:")
print(df_raw.columns)

print(f"\nFilas totales: {len(df_raw)}")

Descargando ocurrencias desde GBIF...
Descargados: 29965 registros
Total descargado: 29965 registros

Columnas disponibles:
Index(['key', 'datasetKey', 'publishingOrgKey', 'installationKey',
       'hostingOrganizationKey', 'publishingCountry', 'protocol',
       'lastCrawled', 'lastParsed', 'crawlId',
       ...
       'establishmentMeans', 'degreeOfEstablishment', 'pathway',
       'verbatimLabel', 'verbatimDepth', 'reproductiveCondition', 'typeStatus',
       'namePublishedInYear', 'parentNameUsage', 'island'],
      dtype='object', length=201)

Filas totales: 29965


In [4]:
df_raw.to_csv('df_enoki.csv')

In [9]:
df_raw.hasGeospatialIssue.value_counts()

AttributeError: 'DataFrame' object has no attribute 'hasGeospatialIssue'

In [5]:
df_raw.columns.tolist()

['key',
 'datasetKey',
 'publishingOrgKey',
 'installationKey',
 'hostingOrganizationKey',
 'publishingCountry',
 'protocol',
 'lastCrawled',
 'lastParsed',
 'crawlId',
 'extensions',
 'basisOfRecord',
 'occurrenceStatus',
 'classifications',
 'taxonKey',
 'kingdomKey',
 'phylumKey',
 'classKey',
 'orderKey',
 'familyKey',
 'genusKey',
 'speciesKey',
 'acceptedTaxonKey',
 'scientificName',
 'scientificNameAuthorship',
 'acceptedScientificName',
 'kingdom',
 'phylum',
 'order',
 'family',
 'genus',
 'species',
 'genericName',
 'specificEpithet',
 'taxonRank',
 'taxonomicStatus',
 'decimalLatitude',
 'decimalLongitude',
 'coordinateUncertaintyInMeters',
 'continent',
 'stateProvince',
 'gadm',
 'year',
 'month',
 'day',
 'eventDate',
 'startDayOfYear',
 'endDayOfYear',
 'issues',
 'modified',
 'lastInterpreted',
 'license',
 'isSequenced',
 'identifiers',
 'media',
 'facts',
 'relations',
 'isInCluster',
 'recordedBy',
 'dnaSequenceID',
 'geodeticDatum',
 'class',
 'countryCode',
 'recor

o por taxonid 