In [None]:
#Import Libraries
import io
import math 
import os
import pathlib
from glob import glob

import earthpy as et
import earthpy.appeears as etapp
import earthpy.earthexplorer as etee
import geopandas as gpd
import hvplot.xarray  
import hvplot.pandas
import matplotlib.pyplot as plt
import pandas as pd
import rioxarray as rxr
import rioxarray.merge as rxrmerge
import requests
import xarray as xr
import zipfile
from shapely.geometry import box
from xrspatial import slope

In [None]:
# Create data directory
data_dir = os.path.join(et.io.HOME, et.io.DATA_NAME, 'final')

if not os.path.exists(data_dir):
        os.makedirs(data_dir)

# Make data directory the working directory
os.chdir(data_dir)

# Define utm zone
utm = 32613

In [None]:
# Code adapted from: https://medium.com/@loldja/reading-shapefile-zips-from-a-url-in-python-3-93ea8d727856
# Create directory
grassland_url = ('https://data.fs.usda.gov/geodata/edw/'
                 'edw_resources/shp/S_USA.NationalGrassland.zip'
)
print('Downloading shapefile...')

# Request data from url
grassland_request = requests.get(grassland_url)
grassland_zip = zipfile.ZipFile(io.BytesIO(grassland_request.content))
print("Done")

# Extract files from Zip to 
grassland_zip.extractall(
    path=os.path.join(data_dir, 'national-grassland')
    )

In [None]:
# Import shapefile
grassland_gdf = gpd.read_file(os.path.join(
    data_dir, 'national-grassland', 'S_USA.NationalGrassland.shp')
    )

select_grassland_gdf = (
    grassland_gdf
    .set_index('GRASSLANDN')
    .loc[['Comanche National Grassland', 'Pawnee National Grassland']]
)

select_grassland_gdf

In [None]:
def get_polaris_data(data_directory, input_gdf, index_col_name):
    """
    This function downloads in Polaris soil data for the extent of each 
    row in a geodataframe and creates a merged data array. Downloaded 
    data is saved in individual folders for each row. Merged data arrays
    for each row are saved in "Merged_files" subfolder.

    Attributes
    ----------
    data_directory : path
        The path to the data directory. A sub-directory will be created
        within this directory for the soil data downloads. 

    input_gdf: geopandas.GeoDataFrame
        A geodataframe that contains the areas of interest. Polaris soil
        data will be downloaded according to the bounds of each row in 
        the dataframe. 
    
    index_col_name: string
        A string containing the name of the geoDataFrame column that
        should be used as the index. This index will be used to name
        output files.
    
    Returns
    ----------
    output_lst: list of data arrays
        Returns a list of data arrays representing the merged Polaris
        tif data for each row in the input geodataframe
    """

    # Set index to specified index column
    input_gdf = input_gdf.reset_index().set_index(index_col_name)

    # Create dataframe of bounds of each gdf row
    bound_pd = pd.concat([input_gdf.bounds], axis=1)

    file_list = []
    
    # Loop through each row in the boundary dataframe
    for ind in bound_pd.index:
        print("\n", ind, ":")
        # Define and round min and max longitude and latitudes
        min_lon = math.floor(bound_pd['minx'][ind])
        max_lon = math.ceil(bound_pd['maxx'][ind])
        min_lat = math.floor(bound_pd['miny'][ind])
        max_lat = math.ceil(bound_pd['maxy'][ind])
        # Define range
        lat_range = range(min_lat,max_lat)
        lon_range = range(min_lon,max_lon)

        # Create template for polaris url path
        polaris_template_url = 'http://hydrology.cee.duke.edu/POLARIS/PROPERTIES/v1.0/' \
                    '{0}/{1}/{2}/lat{3}{4}_lon{5}{6}.tif'
        # Create template for file names
        polaris_template_name = 'mean_ph_lat{0}{1}_lon{2}{3}.tif'

        # Create sub-directory for soil data
        soil_dir = os.path.join(data_directory, "soil_data")
        if not os.path.exists(soil_dir):
            os.makedirs(soil_dir)

        # Create sub-folders for each row's data
        soil_row_dir = os.path.join(soil_dir, str(ind).replace(" ", "_"))
        if not os.path.exists(soil_row_dir):
            os.makedirs(soil_row_dir)
        
        # Create sub-folder for merged data arrays
        soil_merged_dir = os.path.join(soil_dir, "Merged_tifs")
        if not os.path.exists(soil_merged_dir):
            os.makedirs(soil_merged_dir)
        
        # For each latitude and longitude in the extent, download file
        for lat in lat_range:
            for lon in lon_range:
                # Define url for each data file in extent
                url = polaris_template_url.format('ph',
                                        'mean',
                                        '60_100',
                                        str(lat),
                                        str(lat+1),
                                        str(lon),
                                        str(lon+1))
                # Define file name for each data file in extent
                file_name = polaris_template_name.format(str(lat),
                                        str(lat+1),
                                        str(lon),
                                        str(lon+1))
                # Check if tif file is in directory. Download if not.
                file_name_path = os.path.join(soil_row_dir, file_name)
                if not os.path.exists(file_name_path): 
                    print(file_name, "does not exist. Downloading file")
                    r = requests.get(url, allow_redirects=True)
                    open(file_name_path, 'wb').write(r.content)
                else:
                    print(file_name, "is already downloaded")

        # Merge arrays if the merged file does not exist
        merge_template_name = '{0}_merged_da.tif'
        merge_da_name = merge_template_name.format(str(ind).replace(" ", "_"), ".tif")
        if not os.path.exists(os.path.join(soil_merged_dir, merge_da_name)): 
            print("Soil data is being merged.")
            tif_paths = glob(os.path.join(soil_row_dir, '*.tif'))
            das = [rxr.open_rasterio(tif, masked=True) for tif in tif_paths]
            merged_da = rxrmerge.merge_arrays(das)
            merged_da.rio.to_raster(
                os.path.join(soil_merged_dir, merge_da_name)
                )
        else:
            print("A merged soil data array already exists.")
            merged_da = rxr.open_rasterio(
                os.path.join(soil_merged_dir, merge_da_name), masked=True
                )
        
        # Add file to list of tif files to return
        file_list.append(os.path.join(soil_merged_dir, merge_da_name))
        
    # Return files
    output_lst = [rxr.open_rasterio(tif, masked=True).squeeze() for tif in file_list]
    return output_lst

comanche_pH_da, pawnee_pH_da = get_polaris_data(data_dir, select_grassland_gdf, "GRASSLANDN")

In [None]:
# Change projection of grassland data
select_grassland_utm_gdf = select_grassland_gdf.to_crs(utm)

# Clip files to bounds
pawnee_pH_da = (pawnee_pH_da
                  .rio.reproject(utm)
                  .rio.clip_box(*select_grassland_utm_gdf
                                .bounds
                                .loc['Pawnee National Grassland'])
)

comanche_pH_da = (comanche_pH_da
                  .rio.reproject(utm)
                  .rio.clip_box(*select_grassland_utm_gdf
                                .loc[['Comanche National Grassland']]
                                .total_bounds)
)