# German Mammals from GBIF
A clean, reproducible workflow using `pygbif`, pandas, and IUCN taxonomy.

In [None]:
from pygbif import occurrences as occ
import pandas as pd
import zipfile
import os
import time


## Set GBIF credentials

In [None]:
os.environ["GBIF_USER"] = "YOUR_USERNAME"
os.environ["GBIF_PWD"] = "YOUR_PASSWORD"
os.environ["GBIF_EMAIL"] = "YOUR_EMAIL"


Check out the SQL downloads from GBIF

## Create Download

In [None]:
download_key = occ.download(
    [
        'taxonKey = 359',
        'country = DE',
        'hasCoordinate = true',
        'year > 1950',
        'basisOfRecord = HUMAN_OBSERVATION'
    ],
    format="DWCA",# or "SIMPLE_CSV", "SPECIES_LIST"
    user=os.environ["GBIF_USER"],
    pwd=os.environ["GBIF_PWD"],
    email=os.environ["GBIF_EMAIL"]
)

download_key = download_key[0]
download_key


## Check Download Status

In [None]:
while True:
    meta = occ.download_meta(download_key)
    status = meta["status"]
    print(f"Current GBIF status: {status}")

    if status in ["SUCCEEDED", "FAILED", "CANCELLED", "KILLED"]:
        break

    time.sleep(60)  # check every 1 minute

if status == "SUCCEEDED":
    print("\n✅ GBIF download is finished!\n")
    print("\a")  # terminal bell / sound
else:
    print(f"\n⚠️ GBIF download ended with status: {status}\n")
    print("\a")  # still ping you


## Retrieve Archive

In [None]:
download = occ.download_get(download_key, path="outputs")
zip_path = download["path"]

outpath = "outputs/gbif_data"
with zipfile.ZipFile(zip_path, "r") as z:
    z.extractall(outpath)

occ_path = outpath + "/occurrence.txt"
occ_path


## Load Occurrence Data
check the documentation on this page to find the column names and the meaning
https://dwc.tdwg.org/terms/

In [None]:
df = pd.read_csv(occ_path, sep="\t", low_memory=False)

df = df[
    [
        "order", "family", "genus", "specificEpithet", "speciesKey",
        "vernacularName", "decimalLatitude", "decimalLongitude",
        "countryCode", "year", "basisOfRecord", "occurrenceStatus"
    ]
]

df = df.dropna(subset=["speciesKey"])
df["speciesKey"] = df["speciesKey"].astype(int)
df["species"] = df["genus"] + " " + df["specificEpithet"]

df.head()


## Filter Native Mammals Using IUCN
The GBIF documentation does not do a good job providing the native range of mammals and this list clearly includes animals from zoos etc. There needs to be another filter. This time we use the IUCN list to filter https://www.iucnredlist.org/search. 

In [None]:
IUCN = pd.read_csv("inputs/IUCN_List/taxonomy.csv")
IUCN["species"] = IUCN["genusName"] + " " + IUCN["speciesName"]

native_df = df[df["species"].isin(IUCN["species"])]
native_df.head()


## Final Species List

In [None]:
list_native = native_df[["order", "family", "species"]].drop_duplicates()
print("Number of species:", len(list_native))
list_native.to_csv("MammalsOfGermany.csv", index=False)
list_native.head()
