## Zoonosis Risk

<img src="../images/black-necked_stilts_ca_rice_commission.png" alt="Black-necked stilts forage in wet rice fields" width="760" height="500" longdesc="https://www.ucdavis.edu/sites/default/files/media/images/9127201758-024b4c82d0-o.jpg" />

#### Analysis

Imports

In [1]:
import warnings
warnings.filterwarnings('ignore')

import sys
import os
import time
import pathlib
import zipfile
from getpass import getpass
from glob import glob

import pandas as pd
import numpy as np
import geopandas as gpd
import rioxarray as rxr
from rioxarray.merge import merge_arrays
import xarray as xr
import xrspatial
import tqdm

import holoviews as hv
import hvplot.pandas
import hvplot.xarray
import cartopy.crs as ccrs
import geoviews as gv

import earthaccess
import pygbif.occurrences as occ

In [2]:
# Set the absolute path to scripts
scripts_path = os.path.abspath(os.path.join('..', 'scripts'))

# Add scripts to sys.path
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

import utils

In [3]:
# Prevent GDAL from quitting due to momentary disruptions
os.environ["GDAL_HTTP_MAX_RETRY"] = "5"
os.environ["GDAL_HTTP_RETRY_DELAY"] = "1"

In [4]:
# Access GBIF

reset_credentials = False
# GBIF needs a username, password, and email
credentials = dict(
    GBIF_USER=(input, ''),
    GBIF_PWD=(getpass, ''),
    GBIF_EMAIL=(input, ''),
)

for env_variable, (prompt_func, prompt_text) in credentials.items():
    # Delete credential from environment if requested
    if reset_credentials and (env_variable in os.environ):
        os.environ.pop(env_variable)
    # Ask for credential and save to environment
    if not env_variable in os.environ:
        os.environ[env_variable] = prompt_func(prompt_text)

Set Paths

In [5]:
# Plots
plots_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    'Projects',
    # Project directory
    'zoonosis-risk',
    'plots'
)

# Project data directory 
data_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    'Projects',
    # Project directory
    'zoonosis-risk',
    'data'
)

# CA boundaries
boundary_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    'Projects',
    # Project directory
    'zoonosis-risk',
    'boundary_data'
)

# Boundaries 
county_dir = os.path.join(boundary_dir, 'ca-county-boundaries')
pub_land_dir = os.path.join(boundary_dir, 'ca-public-access-lands')
# Species
gbif_snow_goose_dir = os.path.join(data_dir, 'gbif', 'snow-goose')
gbif_mallard_dir = os.path.join(data_dir, 'gbif', 'mallard')
gbif_red_winged_blackbird_dir = os.path.join(data_dir, 'gbif', 
                                             'red-winged-blackbird')
gbif_savannah_sparrow_dir = os.path.join(data_dir, 'gbif', 'savannah-sparrow')
gbif_house_sparrow_dir = os.path.join(data_dir, 'gbif', 'house-sparrow')
gbif_killdeer_dir = os.path.join(data_dir, 'gbif', 'killdeer')
gbif_rock_pigeon_dir = os.path.join(data_dir, 'gbif', 'rock-pigeon')

os.makedirs(plots_dir, exist_ok=True)
os.makedirs(data_dir, exist_ok=True)

os.makedirs(county_dir, exist_ok=True)
os.makedirs(pub_land_dir, exist_ok=True)
os.makedirs(gbif_snow_goose_dir, exist_ok=True)
os.makedirs(gbif_mallard_dir, exist_ok=True)
os.makedirs(gbif_red_winged_blackbird_dir, exist_ok=True)
os.makedirs(gbif_savannah_sparrow_dir, exist_ok=True)
os.makedirs(gbif_house_sparrow_dir, exist_ok=True)
os.makedirs(gbif_killdeer_dir, exist_ok=True)
os.makedirs(gbif_rock_pigeon_dir, exist_ok=True)

Load site boundaries

In [6]:
# Set up California County Boundaries - Yolo (2024) path
yolo_path = os.path.join(county_dir, 'ca_county.geojson')

# Load in the county data
yolo_county_gdf = gpd.read_file(yolo_path)

yolo_county_gdf

Unnamed: 0,OBJECTID,COUNTY_NAME,COUNTY_ABBREV,COUNTY_NUM,COUNTY_CODE,COUNTY_FIPS,ISLAND,Shape__Area,Shape__Length,GlobalID,geometry
0,57,Yolo,YOL,57,57,113,,4346600000.0,419556.872361,33cd46b3-a884-45b7-9638-80f30b53983f,"POLYGON ((-122.28398 38.92439, -122.2511 38.92..."


In [8]:
# Site plot
yolo_boundary_plt = (
    yolo_county_gdf
    .to_crs(ccrs.Mercator())
    .hvplot(
        title='Yolo County',
        line_color='#ffb403', fill_color=None,
        line_width=3,
        crs=ccrs.Mercator(), tiles='EsriImagery',
        frame_width=600, frame_height=650)
)

yolo_boundary_plt

In [7]:
# Set up California Department of Fish and Wildlife (CDFW) Public Access Lands path
pub_land_path = os.path.join(pub_land_dir, 'ca_public_access_land.geojson')

# Load in the Yolo Bypass Wildlife Area data
yolo_bypass_gdf = gpd.read_file(pub_land_path)

yolo_bypass_gdf

Unnamed: 0,OBJECTID,PROP_TYPE,PROP_NAME,LINK,ACCESS,Shape__Area,Shape__Length,GlobalID,geometry
0,298,Wildlife Area,Yolo Bypass Wildlife Area,http://www.wildlife.ca.gov/Lands/Places-to-Vis...,Open; Refer to regulations or contact CDFW reg...,109852500.0,96683.900937,2a0e562e-895f-42aa-8bf4-efbbf17a1091,"MULTIPOLYGON (((-121.6715 38.55766, -121.67157..."


In [12]:
# Habitat plot
yolo_bypass_plt = (
    yolo_bypass_gdf
    .to_crs(ccrs.Mercator())
    .hvplot(
        title='Yolo Bypass Wildlife Area',
        line_color='#35baf6', fill_color=None,
        line_width=3,
        crs=ccrs.Mercator(), tiles='EsriImagery',
        frame_width=600, frame_height=650)
)

yolo_bypass_plt

In [13]:
# Nested habitat 

(
    yolo_boundary_plt * yolo_bypass_plt
).opts(
    title='Yolo County and Bypass Wildlife Area',
    width=600, height=650
)

Surface Water Temperature (Habitat Inlet)

In [189]:
"""
USGS Daily Values Service
Station: Cache C Outflow From Settling Basin NR Woodland CA - 11452900 
About: Inlet to Yolo Bypass Wildlife Area
Time period: 10-20-2024 to 01-31-2025
Metric: Temperature, water, degrees Celsius
"""

USGS_DAILY_URL = (
    'https://nwis.waterservices.usgs.gov/nwis/iv/?'
    'sites=11452900&agencyCd=USGS&'
    'startDT=2024-10-20T00:00:00.000-07:00&'
    'endDT=2025-01-30T23:59:59.999-08:00&'
    'parameterCd=00010&format=json'
)

In [190]:
usgs_daily_water = pd.read_json(USGS_DAILY_URL)

In [191]:
usgs_daily_water_time_series = usgs_daily_water.iloc[1]
dwts_values = usgs_daily_water_time_series.value[0]['values'][0]['value']
dwts_df = pd.DataFrame(dwts_values)
dwts_df.head()

Unnamed: 0,value,qualifiers,dateTime
0,7.5,[P],2024-10-20T00:00:00.000-07:00
1,7.4,[P],2024-10-20T00:15:00.000-07:00
2,7.3,[P],2024-10-20T00:30:00.000-07:00
3,7.2,[P],2024-10-20T00:45:00.000-07:00
4,7.2,[P],2024-10-20T01:00:00.000-07:00


In [192]:
dwts_df = dwts_df.assign(
    datetime_date=lambda x: pd.to_datetime(x['dateTime']))

dwts_df.head(1)

Unnamed: 0,value,qualifiers,dateTime,datetime_date
0,7.5,[P],2024-10-20T00:00:00.000-07:00,2024-10-20 00:00:00-07:00


In [193]:
dwts_df['date'] = dwts_df.datetime_date.apply(lambda x: pd.to_datetime(
                                                x.strftime('%Y-%m-%d')))
dwts_df.head(1)

Unnamed: 0,value,qualifiers,dateTime,datetime_date,date
0,7.5,[P],2024-10-20T00:00:00.000-07:00,2024-10-20 00:00:00-07:00,2024-10-20


In [194]:
dwts_df_subset = dwts_df[['date', 'value']]
dwts_df_subset['water_temperature'] = dwts_df_subset[
                                        'value'].astype(float)
dwts_df_subset.drop('value', axis=1, inplace=True)

# dwts_daily_min_temp = dwts_df_subset.groupby('date').min()
dwts_daily_max_temp = dwts_df_subset.groupby('date').max()

daily_water_temps = dwts_df_subset.groupby('date').agg(
    Minimum=('water_temperature', 'min'),
    Maximum=('water_temperature', 'max'),
    Average=('water_temperature', 'mean')
)

daily_water_temps.head()

Unnamed: 0_level_0,Minimum,Maximum,Average
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-20,3.3,26.8,13.696875
2024-10-21,5.2,26.2,15.121875
2024-10-22,6.4,25.4,13.636458
2024-10-23,3.5,26.8,14.680208
2024-10-24,5.7,24.8,15.026042


In [195]:
daily_water_temps.hvplot.line(
    label='Daily Surface Water Temperature (Yolo Bypass Wildlife Area Inlet)',
    xlabel='Date', ylabel='Temperature',
    height=500, width=900,
    legend='top_right', group_label='Temperature (°C)'
)

Hosts

In [8]:
HOSTS = {
    'Snow Goose': {
        'DATA PATH': gbif_snow_goose_dir,
        'GBIF SPECIES': '2498167',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_2498167'
    },
    'Mallard': {
        'DATA PATH': gbif_mallard_dir,
        'GBIF SPECIES': '9761484',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_9761484'
    },
    'Red-winged Blackbird': {
        'DATA PATH': gbif_red_winged_blackbird_dir,
        'GBIF SPECIES': '9409198',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_9409198'
    },
    'Savannah Sparrow': {
        'DATA PATH': gbif_savannah_sparrow_dir,
        'GBIF SPECIES': '5231142',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_5231142'
    },
    'House Sparrow': {
        'DATA PATH': gbif_house_sparrow_dir,
        'GBIF SPECIES': '5231190',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_5231190'
    },
    'Killdeer': {
        'DATA PATH': gbif_killdeer_dir,
        'GBIF SPECIES': '2480320',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_2480320'
    },
    'Rock Pigeon': {
        'DATA PATH': gbif_rock_pigeon_dir,
        'GBIF SPECIES': '2495414',
        'GBIF DOWNLOAD': 'GBIF_DOWNLOAD_KEY_2495414'
    }
}

Snow Goose

In [10]:
gbif_snow_goose_csv = utils.download_gbif('Snow Goose', credentials, HOSTS,
                             "2024-10-20,2025-01-31",
                             yolo_bypass_gdf.total_bounds)

gbif_snow_goose_df = utils.load_gbif(gbif_snow_goose_csv)

gbif_snow_goose_df.head(2)

INFO:Your download key is 0008757-250415084134356
INFO:Download file size: 2512 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/snow-goose/0008757-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5063496790,Anseriformes,Anser caerulescens,38.556212,-121.632715,21,1,2025
5063478810,Anseriformes,Anser caerulescens,38.563935,-121.633123,21,1,2025


Mallard

In [11]:
gbif_mallard_csv = utils.download_gbif('Mallard', credentials, HOSTS,
                             "2024-10-20,2025-01-31",
                             yolo_bypass_gdf.total_bounds)

gbif_mallard_df = utils.load_gbif(gbif_mallard_csv)

gbif_mallard_df.head(2)

INFO:Your download key is 0008759-250415084134356
INFO:Download file size: 1008 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/mallard/0008759-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4994589481,Anseriformes,Anas platyrhynchos,38.558212,-121.628433,21,11,2024
4994234652,Anseriformes,Anas platyrhynchos,38.552374,-121.625864,6,12,2024


Red-winged Blackbird

In [12]:
gbif_red_winged_blackbird_csv = utils.download_gbif(
                                        'Red-winged Blackbird', 
                                        credentials, HOSTS,
                                        "2024-10-20,2025-01-31",
                                        yolo_bypass_gdf.total_bounds)

gbif_red_winged_blackbird_df = utils.load_gbif(gbif_red_winged_blackbird_csv)

gbif_red_winged_blackbird_df.head(2)

INFO:Your download key is 0008762-250415084134356
INFO:Download file size: 2035 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/red-winged-blackbird/0008762-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5087202563,Passeriformes,Agelaius phoeniceus,38.562026,-121.629851,23,11,2024
5087173613,Passeriformes,Agelaius phoeniceus,38.561236,-121.630447,23,11,2024


Savannah Sparrow

In [14]:
gbif_savannah_sparrow_csv = utils.download_gbif(
                                        'Savannah Sparrow', 
                                        credentials, HOSTS,
                                        "2024-10-20,2025-01-31",
                                        yolo_bypass_gdf.total_bounds)

gbif_savannah_sparrow_df = utils.load_gbif(gbif_savannah_sparrow_csv)

gbif_savannah_sparrow_df.head(2)

INFO:Your download key is 0008772-250415084134356
INFO:Download file size: 3487 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/savannah-sparrow/0008772-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5063420061,Passeriformes,Passerculus sandwichensis,38.561486,-121.629034,4,1,2025
5063325212,Passeriformes,Passerculus sandwichensis,38.558748,-121.635183,22,1,2025


House Sparrow

In [15]:
gbif_house_sparrow_csv = utils.download_gbif(
                                        'House Sparrow', 
                                        credentials, HOSTS,
                                        "2024-10-20,2025-01-31",
                                        yolo_bypass_gdf.total_bounds)

gbif_house_sparrow_df = utils.load_gbif(gbif_house_sparrow_csv)

gbif_house_sparrow_df.head(2)

INFO:Your download key is 0008776-250415084134356
INFO:Download file size: 1007 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/house-sparrow/0008776-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5063400354,Passeriformes,Passer domesticus,38.563145,-121.635772,22,1,2025
4993955876,Passeriformes,Passer domesticus,38.560249,-121.631003,24,11,2024


Killdeer

In [16]:
gbif_killdeer_csv = utils.download_gbif(
                                        'Killdeer', 
                                        credentials, HOSTS,
                                        "2024-10-20,2025-01-31",
                                        yolo_bypass_gdf.total_bounds)

gbif_killdeer_df = utils.load_gbif(gbif_killdeer_csv)

gbif_killdeer_df.head(2)

INFO:Your download key is 0008779-250415084134356
INFO:Download file size: 1611 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/killdeer/0008779-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5063465801,Charadriiformes,Charadrius vociferus,38.552232,-121.620172,25,1,2025
5063089654,Charadriiformes,Charadrius vociferus,38.560178,-121.623771,17,1,2025


Rock Pigeon

In [17]:
gbif_rock_pigeon_csv = utils.download_gbif(
                                        'Rock Pigeon', 
                                        credentials, HOSTS,
                                        "2024-10-20,2025-01-31",
                                        yolo_bypass_gdf.total_bounds)

gbif_rock_pigeon_df = utils.load_gbif(gbif_rock_pigeon_csv)

gbif_rock_pigeon_df.head(2)

INFO:Your download key is 0008782-250415084134356
INFO:Download file size: 921 bytes
INFO:On disk at /Users/laurenalexandra/Projects/zoonosis-risk/data/gbif/rock-pigeon/0008782-250415084134356.zip


Unnamed: 0_level_0,order,species,decimalLatitude,decimalLongitude,day,month,year
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4994159101,Columbiformes,Columba livia,38.56393,-121.63309,18,11,2024


Normalize by time for the sampling effort

In [13]:
def normalize_occurrences(occ_df):
    """
    Normalize occurrence data for given species.

    Args:
    occ_df (pandas.DataFrame): DataFrame of species occurrence.

    Returns:
    norm_occ_df (pandas.DataFrame): DataFrame of normalized species occurrence.
    """
    
    # Take the mean by month
    mean_occurrences_by_month = (
        occ_df
        .groupby(['month'])
        .mean()
    )

    occ_df['norm_occurrences'] = (
        occ_df
        / mean_occurrences_by_month
    )

    return occ_df 