In [1]:
from rasterstats import zonal_stats
import fiona
import geopandas as gpd
import os
import pandas as pd
import numpy as np

import itertools
import multiprocess

home = "/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling"

In [2]:
# all headwaters shapefile 
shp = os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_sbr_shp.shp")

In [3]:
# NLCD: permanent forest 100% and the catchment is greater than 0.025 (>= 0.025 gives at least 5 ecostress pixels to a headwater catchment)
shape_gdf = gpd.read_file(shp)
perm_forest_pd = pd.read_csv(os.path.join(home, "Data", "nlcd_permanent_forest", "headwater_pct_forest.csv"))
nlcd_pf = shape_gdf.merge(perm_forest_pd[['percent_forest', 'NHDPlusID']], on = "NHDPlusID", how = 'left')
nlcd_pf = nlcd_pf[nlcd_pf['percent_forest'] == 1]
nlcd_pf = nlcd_pf[nlcd_pf.AreaSqKm_x >= 0.025]
nlcd_pf.shape

(21271, 6)

In [4]:
# compare with LCMAP permanent forest 
shape_gdf = gpd.read_file(shp)
lcmap = pd.read_csv(os.path.join(home, "Data", "lcmap_permanent_forest", "lcmap_headwater_pct_forest.csv"))
lcmap['percent_forest'] = lcmap['36.0'] / lcmap.iloc[:,1:(lcmap.shape[1]-1)].sum(axis=1)
lcmap_pf = shape_gdf.merge(lcmap[['percent_forest', 'NHDPlusID']], on = "NHDPlusID", how = 'left')
lcmap_all = lcmap_pf
lcmap_pf = lcmap_pf[lcmap_pf['percent_forest'] == 1]
#lcmap_pf = lcmap_pf[lcmap_pf['percent_forest'] >= 0.98]
lcmap_pf = lcmap_pf[lcmap_pf.AreaSqKm_x >= 0.025]
lcmap_pf.shape

(30045, 6)

In [6]:
lcmap_all.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_lcmap.shp"))

  pd.Int64Index,
  lcmap_all.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_lcmap.shp"))


In [7]:
# we trust the lcmap data more and i have it for more dates 
# it also gives us more catchments to work with, which is nice 
nlcd_ids = nlcd_pf['NHDPlusID'].to_numpy()
lcmap_ids = lcmap_pf['NHDPlusID'].to_numpy()

# find which nlcd ids are in lcmap 
s = set(lcmap_ids)
booleans = [i in s for i in nlcd_ids]
nlcd_in_lcmap = nlcd_ids[booleans] 
print('nlcd in lcmap = ', len(nlcd_in_lcmap))

# find which nlcd ids are not in lcmap  
s = set(lcmap_ids)
booleans = [i not in s for i in nlcd_ids]
nlcd_notin_lcmap = nlcd_ids[booleans] # 18865
print('nlcd NOT in lcmap = ', len(nlcd_notin_lcmap))


# find which lcmap are not in nlcd 
s = set(nlcd_ids)
booleans = [i not in s for i in lcmap_ids]
lcmap_notin_nlcd = lcmap_ids[booleans]
print('lcmap NOT in nlcd = ', len(lcmap_notin_nlcd))


nlcd in lcmap =  16777
nlcd NOT in lcmap =  4494
lcmap NOT in nlcd =  13268


In [9]:
# I am going to use the lcmap 100% forested headwater catchments that are >= 0.025 km2 
lcmap_pf.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest.shp"))

  pd.Int64Index,
  lcmap_pf.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest.shp"))


In [10]:
lcmap_pf_32617 = lcmap_pf.to_crs("EPSG:32617")
lcmap_pf_32617.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest_32617.shp"))

  pd.Int64Index,
  lcmap_pf_32617.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest_32617.shp"))


In [5]:
ugh = gpd.read_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest_32617.shp"))

In [9]:
np.mean(ugh.AreaSqKm_x)

0.1301430787458807

### Assess permanent forest cover in the refrence watersheds

In [8]:
shp = os.path.join(home, "Data", "Catchments", "Reference", "gages_ii",  "reference_keep_4326.shp")
tif = os.path.join(home, "Data", "lcmap_permanent_forest", "lcmap_permanent_forest.tif")
lcmap_rw = gpd.read_file(os.path.join(home, "Data", "lcmap_permanent_forest", "lcmap_reference_watersheds.csv"))
lcmap_rw = lcmap_rw.rename(columns=lcmap_rw.iloc[0])
lcmap_rw = lcmap_rw.iloc[1:,1:lcmap_rw.shape[1]-1]
cols = lcmap_rw.columns[lcmap_rw.dtypes.eq('object')]
lcmap_rw[cols] = lcmap_rw[cols].apply(pd.to_numeric, errors='coerce')
lcmap_rw = lcmap_rw.replace(np.nan, 0)
lcmap_rw['percent_forest'] = lcmap_rw['36.0'] / lcmap_rw.iloc[:,0:(lcmap_rw.shape[1]-1)].sum(axis=1)
lcmap_rw[['GAGE_ID', 'percent_forest']].to_csv(os.path.join(home, "Data", "lcmap_permanent_forest", "lcmap_perm_forest_reference_watersheds.csv"), index=False)

In [14]:
lcmap_rw

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,GAGE_ID,percent_forest
1,65127,74.0,57.0,28.0,70.0,82.0,70.0,132.0,24.0,76.0,...,297.0,429,638.0,875.0,726,1123,1122.0,320925,2053800,0.812382
2,141181,515.0,224.0,253.0,250.0,143.0,139.0,255.0,163.0,399.0,...,1927.0,1968,2363.0,2258.0,2803,2875,2342.0,249723,2056900,0.596422
3,65016,278.0,122.0,139.0,348.0,239.0,418.0,767.0,301.0,345.0,...,1051.0,1479,1508.0,2336.0,2415,2251,1890.0,220474,2069700,0.713395
4,65800,305.0,310.0,276.0,606.0,337.0,301.0,449.0,271.0,687.0,...,4217.0,4671,7420.0,8530.0,7571,6489,6110.0,247535,2070000,0.65487
5,8742,30.0,16.0,19.0,12.0,51.0,46.0,54.0,72.0,29.0,...,258.0,412,597.0,636.0,846,831,921.0,166214,2111180,0.907054
6,41019,172.0,96.0,163.0,149.0,230.0,334.0,445.0,208.0,263.0,...,1966.0,2970,3226.0,3298.0,3774,3368,3434.0,250344,2111500,0.770043
7,63992,224.0,264.0,341.0,266.0,541.0,666.0,806.0,318.0,368.0,...,3271.0,3617,4201.0,3991.0,3726,4083,4113.0,339273,2112120,0.757504
8,51235,224.0,147.0,215.0,540.0,228.0,252.0,725.0,256.0,183.0,...,2029.0,2430,3001.0,3006.0,2466,2933,2497.0,205339,2112360,0.717813
9,202160,1220.0,671.0,714.0,802.0,939.0,1642.0,1793.0,798.0,1360.0,...,5613.0,4985,5796.0,5990.0,5399,6286,5576.0,279573,2118500,0.503558
10,51667,168.0,175.0,216.0,109.0,164.0,136.0,260.0,236.0,209.0,...,1192.0,1543,2129.0,2419.0,2153,2227,2702.0,378831,2137727,0.834897
