In [None]:
from datetime import datetime
import logging
import math
import os
from pathlib import Path
from zipfile import ZipFile

import arcpy
from arcgis.gis import GIS
from arcgis.features import FeatureSet
from dotenv import find_dotenv, load_dotenv
import requests
from tqdm import tqdm

# url to find the archive for the national gdb
url_zip = 'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHD/National/HighResolution/GDB/NHD_H_National_GDB.zip'

# load the dotenv file
load_dotenv(find_dotenv())

# dir_root = Path(os.path.abspath('./')).parent
dir_root = Path(r'Z:\projects\water-reach-tools')
dir_data = dir_root/'data'
dir_raw_data = dir_data/'raw'
dir_int_data = dir_data/'interim'
gdb_int_data = dir_int_data/'interim.gdb'

# paths to resources
name_zip = url_zip.split('/')[-1:][0]
zip_file = dir_raw_data/name_zip
nhd_gdb = dir_int_data/f'{name_zip.split(".")[0]}.gdb'
nhd_flowline = nhd_gdb/'NHDFlowline'

# configure logging
log_lvl = logging.DEBUG
log_fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='‘%Y-%m-%d %H:%M:%S')

logger = logging.getLogger('nhd_download')
logger.setLevel(log_lvl)

log_ch = logging.StreamHandler()
log_ch.setFormatter(log_fmt)
log_ch.setLevel(logging.DEBUG)
logger.addHandler(log_ch)

log_fl = logging.FileHandler(dir_int_data/'nhd_download.log')
log_fl.setFormatter(log_fmt)
log_fl.setLevel(logging.DEBUG)
logger.addHandler(log_fl)


def _download_file(url, output_filename, chunk_size=10240):  # 10 mb default
    """
    Helper function to download large files and using tqdm, provide visual progress.
    """
    
    # get the file size and number of iterations it is going to take to download the archive
    file_size = int(requests.head(url).headers['Content-Length'])
    iteration_count = math.ceil(file_size/chunk_size)
    
    # create a request get instance to use for streaming
    with requests.get(url, stream=True) as get:
        
        # throw any errors
        get.raise_for_status()
        
        # create a place to write the file
        with open(output_filename, 'wb') as file:
            
            # iteratively download the chunks to get the whole file
            for chunk in tqdm(get.iter_content(chunk_size=chunk_size),leave=False, total=iteration_count): 
                
                # filter out keep-alive new chunks and write the good stuff to the output file
                if chunk:
                    file.write(chunk)

    return output_filename

In [2]:
gis = GIS(os.getenv('GIS_URL'), username=os.getenv('GIS_USERNAME'), password=os.getenv('GIS_PASSWORD'))
gis

In [None]:
# download the file and save it to the raw directory
logger.info(f'Starting to download {name_zip}')
_download_file(url, output_file)
logger.info(f'Saved to {output_file}.')

In [8]:
# get the path to the downloaded zip file
name_zip = url_zip.split('/')[-1:][0]
zip_file = dir_raw_data/name_zip

# extract the contents to the interim data directory
logger.debug(f'Starting to unzip {name_zip}.')
with ZipFile(zip_file) as zip_obj:
    zip_obj.extractall(dir_int_data)
logger.info(f'Completed unzipping.')

In [None]:
fs_arcpy = arcpy.FeatureSet()
fs_arcpy.load(str(nhd_flowline))
flowline_fs = FeatureSet(fs_arcpy.JSON)