# River bifurcation in CONUS workflow
This notebook contains the workflow necessary to extract data from a HUC4 and join it to NABD for bifurcation analysis.

## 1. Install packages

In [1]:
# import sys
# !{sys.executable} -m pip install geofeather
# !{sys.executable} -m pip install nhdnet  #see Setup info document 

## 2. Load modules

In [1]:
#basic analysis 
from pathlib import Path
import os
from time import time
import geopandas as gp
import geofeather
import numpy as np
import pandas as pd


#modules from SARP analysis
from geofeather import to_geofeather
from nhdnet.nhd.extract import extract_flowlines 
# from nhdnet.nhd.extract_test import extract_flowlines_R  # this function was created by Rachel to extract info from other NHD
# from nhdnet.nhd.extract import extract_waterbodies
from nhdnet.io import serialize_df, serialize_sindex, to_shp 

#Getting the other NHD 
# from nhdnet.nhd.download import download_huc4
# nhd_dir = Path("data/nhd/source/huc4")


# #pull data from Google Drive
# from pydrive.auth import GoogleAuth
# from pydrive.drive import GoogleDrive

# gauth = GoogleAuth()
# gauth.LocalWebserverAuth() # client_secrets.json need to be in the same directory as the script
# drive = GoogleDrive(gauth)

## 3. Initial setup and constants

In [2]:
#Select HUC of interest
HUC2 = 10
i = 19
HUC4 = "{0}{1:02d}".format(HUC2, i)  # this formats the HUC4 name how we want it. ':02d' is string formatting
print(HUC4)
print(type(HUC4))
huc_id = int(HUC4) * 1000000   # the full HUC4 ID
print(huc_id)

data_dir = Path("data/nhd/source/huc4")  # point to where GDBs are
# data_dir = Path("/Volumes/GoogleDrive/My Drive/Condon_Research_Group/Research_Projects/Rachel/Research/GIS/Layers/NHDPlusNationalData")  # point to where GDBs are

#Setting projections
CRS = {           # Using USGS CONUS Albers (EPSG:102003): https://epsg.io/102003  WHY?
    "proj": "aea",
    "lat_1": 29.5,
    "lat_2": 45.5,
    "lat_0": 37.5,
    "lon_0": -96,
    "x_0": 0,
    "y_0": 0,
    "datum": "NAD83",
    "units": "m",
    "no_defs": True,
}
print(data_dir)

1019
<class 'str'>
1019000000
data/nhd/source/huc4


## 4. Read in the geodatabase

In [3]:
# sys.path.append('/Users/rachelspinti/Documents/River_bifurcation/data/nhd/source/huc4/1019') #call where these scripts are located

gdb = data_dir/HUC4/ "NHDPLUS_H_{HUC4}_HU4_GDB.gdb".format(HUC4=HUC4)
print(gdb)
read_start = time()
flowlines, joins = extract_flowlines(gdb, target_crs=CRS)
print("Read {:,} flowlines in  {:.0f} seconds".format(len(flowlines), time() - read_start))

# gdb = data_dir/"NHDPlusV21_National_Seamless_Flattened_Lower48.gdb"
# print(gdb)
# read_start = time()
# flowlines = extract_flowlines_R(gdb, target_crs=CRS)
# print("Read {:,} flowlines in  {:.0f} seconds".format(len(flowlines), time() - read_start))

# gdb = data_dir/ "NHDPlusV21_National_Seamless_Flattened_Lower48.gdb".format(HUC4=HUC4)
# print(gdb)
# read_start = time()
# flowlines, joins = extract_flowlines(gdb, target_crs=CRS)
# print("Read {:,} flowlines in  {:.0f} seconds".format(len(flowlines), time() - read_start))

flowlines.head(3)

data/nhd/source/huc4/1019/NHDPLUS_H_1019_HU4_GDB.gdb
Reading flowlines
Columns= <bound method NDFrame.head of            NHDPlusID  FlowDir  FType   GNIS_ID             GNIS_Name  \
0       2.300190e+13        1    460      None                  None   
1       2.300190e+13        1    460      None                  None   
2       2.300190e+13        1    460      None                  None   
3       2.300190e+13        1    460      None                  None   
4       2.300190e+13        1    460      None                  None   
...              ...      ...    ...       ...                   ...   
232506  2.300190e+13        1    460      None                  None   
232507  2.300190e+13        1    460  00184959  West Toll Gate Creek   
232508  2.300190e+13        1    334      None                  None   
232509  2.300190e+13        1    558      None                  None   
232510  2.300190e+13        1    336  00203030      Burlington Ditch   

                         

Unnamed: 0_level_0,NHDPlusID,FType,GNIS_ID,GNIS_Name,geometry,ReachCode,lineID,streamorder,TotDASqKm,sizeclass,length,sinuosity
NHDPlusID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
23001900008851,23001900008851,460,,,"LINESTRING (-800081.762 215515.782, -800082.87...",10190001010972,1,1,0.1164,1a,486.965546,1.082492
23001900008863,23001900008863,460,,,"LINESTRING (-799823.525 221309.940, -799829.56...",10190002021576,2,1,0.0245,1a,81.604347,1.00827
23001900008864,23001900008864,460,,,"LINESTRING (-800259.206 220461.613, -800252.86...",10190002022431,3,1,0.103,1a,480.92453,1.016657


### 4.1 Read in the HUC4 shapefile (1019) I made 

In [4]:
huc_test = (gp.read_file('/Users/rachelspinti/Desktop/HUC_test/Test1029.shp')) # this is actually HUC 1019
huc_test = huc_test.rename(columns={"REACHCODE": "ReachCode"})
huc_test.ReachCode = huc_test.ReachCode.astype("uint64")
# print(huc_test)
type(huc_test.ReachCode)

pandas.core.series.Series

In [5]:
huc_test.head(3)
flowlines.head(3)

Unnamed: 0_level_0,NHDPlusID,FType,GNIS_ID,GNIS_Name,geometry,ReachCode,lineID,streamorder,TotDASqKm,sizeclass,length,sinuosity
NHDPlusID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
23001900008851,23001900008851,460,,,"LINESTRING (-800081.762 215515.782, -800082.87...",10190001010972,1,1,0.1164,1a,486.965546,1.082492
23001900008863,23001900008863,460,,,"LINESTRING (-799823.525 221309.940, -799829.56...",10190002021576,2,1,0.0245,1a,81.604347,1.00827
23001900008864,23001900008864,460,,,"LINESTRING (-800259.206 220461.613, -800252.86...",10190002022431,3,1,0.103,1a,480.92453,1.016657


## 5. Join the two datasets
Check this link out for help: https://www.earthdatascience.org/courses/use-data-open-source-python/intro-vector-data-python/vector-data-processing/spatial-joins-in-python-geopandas-shapely/

See also: https://geopandas.org/mergingdata.html

https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html

In [None]:
# Attribute join with geopandas
flowlines = flowlines.merge(huc_test, on='ReachCode')
flowlines.head(4)  #check the join
# print(flowlines.COMID)
flowlines[flowlines.streamorder>4].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x124e48250>

In [8]:
len(flowlines)

81434

## 6. Join NHD and NABD

In [7]:
# Read in NABD shapefile and join with NHD
nabd = gp.read_file('/Users/rachelspinti/Documents/River_bifurcation/data/nabd/nabd_fish_barriers_2012.shp')
nabd = nabd.merge(flowlines, on='COMID')   #attribute join with geopandas
nabd.head(4)  #check the join

Unnamed: 0,COMMENT,NIDID,COMID,UNIQUE_STR,newX,newY,RecordID,Dam_name,Dam_former,STATEID,...,VC_12,QE_12,VE_12,LakeFract,SurfArea,RAreaHLoad,RPUID,VPUID,Enabled,geometry_y
0,1,CO00307,188119.0,3670,-105.113836,39.685764,5615.0,SMITH,,75311,...,0.54933,0.046,0.54933,,,,10c,10L,1,"LINESTRING Z (-105.11396 39.68577 0.00000, -10..."
1,1,CO00297,188127.0,3671,-105.104427,39.683964,5608.0,KENDRICK,,95223,...,0.59469,0.068,0.59469,,,,10c,10L,1,"LINESTRING Z (-105.10460 39.68396 0.00000, -10..."
2,1,CO00297,188127.0,3671,-105.104427,39.683964,5608.0,KENDRICK,,95223,...,0.59469,0.068,0.59469,,,,10c,10L,1,"LINESTRING Z (-105.10460 39.68396 0.00000, -10..."
3,1,CO00297,188127.0,3671,-105.104427,39.683964,5608.0,KENDRICK,,95223,...,0.59469,0.068,0.59469,,,,10c,10L,1,"LINESTRING Z (-105.10460 39.68396 0.00000, -10..."


In [9]:
len(nabd)

2073

## Getting information about what came out of this

First for the flowlines -- this is a geodataframe with the flowline geometry. Comes from *flowlines.py*

In [None]:
type(flowlines)
flowlines.head(3)
# print(flowlines.describe)
# flowlines.plot()
# print(flowlines.shape)
# print(list(flowlines.columns))
# flowlines[flowlines.streamorder>6]
# flowlines[flowlines.streamorder>6].plot()