In [60]:
#Import Libraries
import io
import math 
import os
import pathlib
from glob import glob

import earthpy as et
import earthpy.appeears as etapp
import earthpy.earthexplorer as etee
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as rxr
import rioxarray.merge as rxrmerge
import requests
import xarray as xr
import zipfile
from shapely.geometry import box
from xrspatial import slope

In [61]:
# Create data directory
data_dir = os.path.join(et.io.HOME, et.io.DATA_NAME, 'final')
print(data_dir)

if not os.path.exists(data_dir):
        os.makedirs(data_dir)

# Create directory for soil datafiles
soil_path=os.path.join(data_dir, 'soil')
if not os.path.exists(soil_path):
        os.makedirs(soil_path)

C:\Users\ptham\earth-analytics\data\final


In [62]:
# Code adapted from: https://medium.com/@loldja/reading-shapefile-zips-from-a-url-in-python-3-93ea8d727856
# Create directory
grassland_url = ('https://data.fs.usda.gov/geodata/edw/'
                 'edw_resources/shp/S_USA.NationalGrassland.zip'
)
print('Downloading shapefile...')

# Request data from url
grassland_request = requests.get(grassland_url)
grassland_zip = zipfile.ZipFile(io.BytesIO(grassland_request.content))
print("Done")

# Extract files from Zip to 
grassland_zip.extractall(
    path=os.path.join(data_dir, 'national-grassland')
    )

Downloading shapefile...
Done


In [63]:
# Import shapefile
grassland_gdf = gpd.read_file(os.path.join(
    data_dir, 'national-grassland', 'S_USA.NationalGrassland.shp')
    )

select_grassland_gdf = (
    grassland_gdf
    .set_index('GRASSLANDN')
    .loc[['Comanche National Grassland', 'Pawnee National Grassland']]
)
select_grassland_gdf

Unnamed: 0_level_0,NATIONALGR,GIS_ACRES,SHAPE_AREA,SHAPE_LEN,geometry
GRASSLANDN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Comanche National Grassland,295522010328,444413.904,0.183064,26.658022,"MULTIPOLYGON (((-104.02263 37.69224, -104.0225..."
Pawnee National Grassland,295523010328,208424.885,0.089972,15.341594,"MULTIPOLYGON (((-104.58106 40.82664, -104.5810..."


In [64]:
bounds = (select_grassland_gdf.total_bounds)

# Define extent and round to whole numbers for defining soil path file names
min_lon = math.floor(bounds[0])
max_lon = math.ceil(bounds[2])
min_lat = math.floor(bounds[1])
max_lat = math.ceil(bounds[3])

min_lon, max_lon,  min_lat, max_lat

(-105, -102, 36, 42)

In [65]:
# # https://stackoverflow.com/questions/68198525/geopandas-obtain-min-max-lat-and-long-for-all-geometries

# # Get boundaries
# select_grassland_bnd_gdf = select_grassland_gdf.bounds

# # Define extent and round to whole 
# min_lon = math.floor(float(select_grassland_bnd_gdf.minx.min()))
# max_lon = math.ceil(float(select_grassland_bnd_gdf.maxx.max()))
# min_lat = math.floor(float(select_grassland_bnd_gdf.miny.min()))
# max_lat = math.ceil(float(select_grassland_bnd_gdf.maxy.max()))

# min_lon, max_lon, max_lat, min_lat

In [66]:
# Download Polaris tif files
# Code adapted from: 
        # https://www.tutorialspoint.com/downloading-files-from-web-using-python
        # https://github.com/njadid/Polaris_Processor/blob/master/download_polaris.py

# # Set working directory to soil directory
os.chdir(soil_path)

# Define geographical extent
domain_extent = {}
domain_extent['lon'] = [min_lon, max_lon]
domain_extent['lat'] = [min_lat, max_lat]

# url_path_lst = []
def downloadPolaris(soil_directory, geo_extent):
      template_url = 'http://hydrology.cee.duke.edu/POLARIS/PROPERTIES/v1.0/' \
                   '{0}/{1}/{2}/lat{3}{4}_lon{5}{6}.tif'
      template_name = 'mean_ph_lat{0}{1}_lon{2}{3}.tif'
      lat_range = range(geo_extent['lat'][0],geo_extent['lat'][1])
      lon_range = range(geo_extent['lon'][0],geo_extent['lon'][1])
      for lat in lat_range:
        for lon in lon_range:
            url = template_url.format('ph',
                                      'mean',
                                      '30_60',
                                      str(lat),
                                      str(lat+1),
                                      str(lon),
                                      str(lon+1))
            file_name = template_name.format(str(lat),
                                      str(lat+1),
                                      str(lon),
                                      str(lon+1))
            # url_path_lst.append(url)
            if not os.path.exists(os.path.join(soil_directory, file_name)): 
                print(file_name, "does not exist. Downloading file")
                r = requests.get(url, allow_redirects=True)
                open(file_name, 'wb').write(r.content)
            else:
                print(file_name, "is already downloaded")

downloadPolaris(soil_path, domain_extent)

# Reset working directory
os.chdir(data_dir)

mean_ph_lat3637_lon-105-104.tif is already downloaded
mean_ph_lat3637_lon-104-103.tif is already downloaded
mean_ph_lat3637_lon-103-102.tif is already downloaded
mean_ph_lat3738_lon-105-104.tif is already downloaded
mean_ph_lat3738_lon-104-103.tif is already downloaded
mean_ph_lat3738_lon-103-102.tif is already downloaded
mean_ph_lat3839_lon-105-104.tif is already downloaded
mean_ph_lat3839_lon-104-103.tif is already downloaded
mean_ph_lat3839_lon-103-102.tif is already downloaded
mean_ph_lat3940_lon-105-104.tif is already downloaded
mean_ph_lat3940_lon-104-103.tif is already downloaded
mean_ph_lat3940_lon-103-102.tif is already downloaded
mean_ph_lat4041_lon-105-104.tif is already downloaded
mean_ph_lat4041_lon-104-103.tif is already downloaded
mean_ph_lat4041_lon-103-102.tif is already downloaded
mean_ph_lat4142_lon-105-104.tif is already downloaded
mean_ph_lat4142_lon-104-103.tif is already downloaded
mean_ph_lat4142_lon-103-102.tif is already downloaded


In [68]:
# https://www.geeksforgeeks.org/python-list-files-in-a-directory/
# Adapted from Multispectral Analysis assignment

# Download and Merge Soil tif data
tif_paths = glob(os.path.join(soil_path, '*.tif'))
tif_paths
das = [rxr.open_rasterio(tif, masked=True) for tif in tif_paths]
merged_soil_da = rxrmerge.merge_arrays(das)

merged_soil_da

MemoryError: Unable to allocate 890. MiB for an array with shape (1, 21600, 10800) and data type float32

In [22]:
# select_grassland_gdf

# select_grassland_gdf = select_grassland_gdf.to_crs(4326)

In [69]:
# https://lpdaac.usgs.gov/products/srtmgl1v003/
# https://appeears.earthdatacloud.nasa.gov/api/#temporary-s3-credentials
# https://appeears.earthdatacloud.nasa.gov/products
# https://lpdaac.usgs.gov/resources/e-learning/getting-started-with-the-a%CF%81%CF%81eears-api-submitting-and-downloading-an-area-request/

# Download Elevation Data Using Appeears
elevation_downloader = etapp.AppeearsDownloader(
    download_key="SRTM_DEM",
    ea_dir=data_dir,
    product="SRTMGL1_NC.003",
    layer="SRTMGL1_DEM",
    start_date="02-11",
    end_date="02-21",
    recurring=True,
    year_range=[2000, 2000],
    polygon=select_grassland_gdf,
)

# Download files if the download directory does not exist
if not os.path.exists(elevation_downloader.data_dir):
    elevation_downloader.download_files()

In [70]:
# Read elevation data.
elevation_tif_path = (
    os.path.join(data_dir, 'SRTM_DEM',
                'SRTMGL1_NC.003_2000001_to_2023342',
                'SRTMGL1_NC.003_SRTMGL1_DEM_doy2000042_aid0001.tif')
)

# elevation_da = rxr.open_rasterio(elevation_tif_path, masked=True)
# elevation_da

# New version of reading in elevation data array
elev_da = rxr.open_rasterio(elevation_tif_path, masked=True).squeeze()
# Prepare to concatenate: Add date dimension and clean up metadata
elev_da.name = 'Elevation'

In [71]:
# elev_da.plot()

In [72]:
# Calculate slope for area for interest
# Projection based on UTM 13: https://epsg.io/32613
elev_da_proj = elev_da.rio.reproject("EPSG:32613")
aoi_slope = slope(elev_da_proj)
aoi_slope

MemoryError: Unable to allocate 245. MiB for an array with shape (1, 14427, 8921) and data type int16

In [28]:
# Clip elevation data to selected grasslands
elev_clip_da = elev_da.rio.clip(select_grassland_gdf.geometry)
# elev_clip_da.plot()

<matplotlib.collections.QuadMesh at 0x14f84afce80>

MemoryError: Unable to allocate 3.83 GiB for an array with shape (14426, 8920, 4) and data type float64

<Figure size 640x480 with 2 Axes>

In [29]:
aoi_clip_slope = aoi_slope.rio.clip(select_grassland_gdf.geometry)
# aoi_clip_slope.plot()

In [None]:
# # Climate data
# clim_rcp45_url = (
#     "http://thredds.northwestknowledge.net:8080/thredds/catalog/"
#     "NWCSC_INTEGRATED_SCENARIOS_ALL_CLIMATE/macav2livneh/GFDL-ESM2M/"
#     "catalog.html?dataset=NWCSC_IS_ALL_SCAN/macav2livneh/GFDL-ESM2M/"
#     "macav2livneh_pr_GFDL-ESM2M_r1i1p1_rcp45_2026_2045_CONUS_monthly.nc"
# )

# r = requests.get(clim_rcp45_url, allow_redirects=True)
# open("clim_rcp45_2026_2045.nc", 'wb').write(r.content)


In [42]:
import xarray as xr
# Open the data from the thredds server
clim_rcp85_url = ("http://thredds.northwestknowledge.net:8080/thredds/"
                  "dodsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp45"
                  "_2006_2099_CONUS_monthly.nc"
                 )
# ccsm_rcp45 = xr.open_dataset(ccsm_rcp45_url)
clim_rcp85 = xr.open_dataset(clim_rcp85_url)

# clim_rcp45_url = ("http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_macav2metdata_tasmax_CCSM4_r6i1p1_rcp45_2006_2099_CONUS_monthly.nc")

# ccsm_rcp45_url = ("http://thredds.northwestknowledge.net:8080/thredds/"
#                   "dsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp4"
#                   "006_2099_CONUS_monthly.nc"
# )

# ccsm_rcp85_url = ("http://thredds.northwestknowledge.net:8080/thredds/"
#                   "dsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp85"
#                   "006_2099_CONUS_monthly.nc"
# )

# ccsm_rcp45_url = ("http://thredds.northwestknowledge.net:8080/thredds/"
#                   "dsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp4
#                   "006_2099_CONUS_monthly.nc"
# )

# ccsm_rcp85_url = ("http://thredds.northwestknowledge.net:8080/thredds/"
#                   "dsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp85"
#                   "006_2099_CONUS_monthly.nc"
# )

# ccsm_rcp45 = xr.open_dataset(ccsm_rcp45_url)
ccsm_rcp85 = xr.open_dataset(ccsm_rcp45_url)

# clip_ccsm_rcp45_da = clim_rcp45.rio.clip_box(select_grassland_gdf.geometry)
clip_ccsm_rcp85_da = clim_rcp85.rio.clip_box(select_grassland_gdf.geometry)
#clim_rcp85.rio.clip_box(miny = -105, minx= 36, maxx=43, maxy=-102)
clip_clim_rcp85_da
# # Select the latitude, longitude, and timeframe to subset the data to

# # Ensure your latitude value is between 25 and 50, and your longitude value is between 235 and 292
# # latitude = 35
# # longitude = 270
# start_date = '2036-01'
# end_date = '2040-12'

# # Select a lat / lon location that you wish to use to extract the data
# latitude = clim_rcp45.lat.values[300]
# longitude = clim_rcp45.lon.values[150]
# print("You selected the following x,y location:", longitude, latitude)

# clim_rcp45.rio.clip_box(
#     minx=36,
#     miny=-105,
#     maxx=43,
#     maxy=-102,
# )

OSError: [Errno -90] NetCDF: file not found: 'http://thredds.northwestknowledge.net:8080/thredds/dsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp4006_2099_CONUS_monthly.nc'

In [44]:
# Import packages
import numpy as np
import netCDF4
import matplotlib.pyplot as plt
import xarray as xr
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import seaborn as sns

# Plotting options
sns.set(font_scale=1.3)
sns.set_style("white")

# Models to chose from
model_name = ('bcc-csm1-1',
              'bcc-csm1-1-m',
              'BNU-ESM',
              'CanESM2',
              'CCSM4',
              'CNRM-CM5',
              'CSIRO-Mk3-6-0',
              'GFDL-ESM2G',
              'GFDL-ESM2M',
              'HadGEM2-CC365',
              'HadGEM2-ES365',
              'inmcm4',
              'IPSL-CM5A-MR',
              'IPSL-CM5A-LR',
              'IPSL-CM5B-LR',
              'MIROC5',
              'MIROC-ESM',
              'MIROC-ESM-CHEM',
              'MRI-CGCM3',
              'NorESM1-M')

# These are the variable options for the met data
variable_name = ('tasmax',
                 'tasmin',
                 'rhsmax',
                 'rhsmin',
                 'pr',
                 'rsds',
                 'uas',
                 'vas',
                 'huss')

# These are var options in long form
var_long_name = ('air_temperature',
                 'air_temperature',
                 'relative_humidity',
                 'relative_humidity',
                 'precipitation',
                 'surface_downwelling_shortwave_flux_in_air',
                 'eastward_wind',
                 'northward_wind',
                 'specific_humidity')

# This is the base url required to download data from the thredds server.
dir_path = 'http://thredds.northwestknowledge.net:8080/thredds/dodsC/'

# These are the variable options for the met data
variable_name = ('tasmax',
                 'tasmin',
                 'rhsmax',
                 'rhsmin',
                 'pr',
                 'rsds',
                 'uas',
                 'vas',
                 'huss')

# These are var options in long form
var_long_name = ('air_temperature',
                 'air_temperature',
                 'relative_humidity',
                 'relative_humidity',
                 'precipitation',
                 'surface_downwelling_shortwave_flux_in_air',
                 'eastward_wind',
                 'northward_wind',
                 'specific_humidity')

# Models to chose from
model_name = ('bcc-csm1-1',
              'bcc-csm1-1-m',
              'BNU-ESM',
              'CanESM2',
              'CCSM4',
              'CNRM-CM5',
              'CSIRO-Mk3-6-0',
              'GFDL-ESM2G',
              'GFDL-ESM2M',
              'HadGEM2-CC365',
              'HadGEM2-ES365',
              'inmcm4',
              'IPSL-CM5A-MR',
              'IPSL-CM5A-LR',
              'IPSL-CM5B-LR',
              'MIROC5',
              'MIROC-ESM',
              'MIROC-ESM-CHEM',
              'MRI-CGCM3',
              'NorESM1-M')

# Scenarios
scenario_type = ('historical', 'rcp45', 'rcp85')

# Year start and ends (historical vs projected)
year_start = ('1950', '2006', '2006')
year_end = ('2005', '2099', '2099')
run_num = [1] * 20
run_num[4] = 6  # setting CCSM4 with run 6
domain = 'CONUS'

# Model options between 0-19
model = 4
# Options 0-8 will work for var. Var maps to the variable name below
var = 4
# Options range from 0-2
scenario = 1

try: 
    print("Great! You have selected: \n \u2705 Variable: {} \n \u2705 Model: {}, "
      "\n \u2705 Scenario: {}".format(variable_name[var], 
                                      model_name[model],
                                      scenario_type[scenario]))
except IndexError as e:
    raise IndexError("Oops, it looks like you selected value that is "
                     "not within the range of values which is 0-2. please look"
                     "closely at your selected values.")
    
try:
    time = year_start[scenario]+'_' + year_end[scenario]
    print("\u2705 Your selected time period is:", time)
except IndexError as e:
    raise IndexError("Oops, it looks like you selected a scenario value that is \
                     not within the range of values which is 0-2")
    
# This code creates a path to the monthly MACA v2 data
file_name = ('agg_macav2metdata_' +
             str(variable_name[var]) +
             '_' +
             str(model_name[model]) +
             '_r' +
             str(run_num[model])+'i1p1_' +
             str(scenario_type[scenario]) +
             '_' +
             time + '_' +
             domain + '_monthly.nc')

print("\u2705 You are accessing:\n", file_name, "\n data in netcdf format")

full_file_path = dir_path + file_name
print("The full path to your data is: \n", full_file_path)

# Open the data from the thredds server
try:
    max_temp_xr = xr.open_dataset(full_file_path)
except OSError as oe:
    print("Oops, it looks like the file that you are trying to connect to, "
          "{}, doesn't exist. Try to revisit your model options to ensure "
          "the data exist on the server.  ".format(full_file_path))
    
max_temp_xr

Great! You have selected: 
 ✅ Variable: pr 
 ✅ Model: CCSM4, 
 ✅ Scenario: rcp45
✅ Your selected time period is: 2006_2099
✅ You are accessing:
 agg_macav2metdata_pr_CCSM4_r6i1p1_rcp45_2006_2099_CONUS_monthly.nc 
 data in netcdf format
The full path to your data is: 
 http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_macav2metdata_pr_CCSM4_r6i1p1_rcp45_2006_2099_CONUS_monthly.nc


In [48]:

# Define urls for Climate Data
CCSM4_rcp45_pr_url = (
    "http://thredds.northwestknowledge.net:8080/thredds/dodsC/"
    "agg_macav2metdata_pr_CCSM4_r6i1p1_rcp45_2006_2099_CONUS_monthly.nc"
)
MIROC5_rcp45_pr_url = (
    "http://thredds.northwestknowledge.net:8080/thredds/dodsC/"
    "agg_macav2metdata_pr_MIROC5_r1i1p1_rcp45_2006_2099_CONUS_monthly.nc"
)

try:
    CCSM4_rcp45_pr_da = xr.open_dataset(CCSM4_rcp45_pr_url)
    MIROC5_rcp45_pr_da = xr.open_dataset(MIROC5_rcp45_pr_url)
except OSError as oe:
    print("Oops, it looks like the file that you are trying to connect to, "
          "{}, doesn't exist. Try to revisit your model options to ensure "
          "the data exist on the server.  ".format(full_file_path))
    
MIROC5_rcp45_pr_da

In [54]:
MIROC5_rcp45_pr_clip_da = MIROC5_rcp45_pr_da.rio.clip_box(

NoDataInBounds: No data found in bounds. Data variable: precipitation

### Data Citations
NASA JPL (2013). NASA Shuttle Radar Topography Mission Global 1 arc second [Data set]. NASA EOSDIS Land Processes Distributed Active Archive Center. Accessed 2023-12-05 from https://doi.org/10.5067/MEaSUREs/SRTM/SRTMGL1.003

Hegewisch, K.C. and Abatzoglou, J.T.. 'Data Download' web tool. Climate Toolbox (https://climatetoolbox.org/), version 2022-04-24.
