In [1]:
%run set_up.py 

import boto3
import py7zr
import re
import requests
import shutil
from urllib.parse import urlparse
from zipfile import ZipFile

In [15]:
def download_and_extract_zip(in_url, out_dir, out_zip_name=None):
    """
    Download and extract zip file
    
    Parameters:
    url (str): URL to download the zip file
    out_dir (str): Directory to save and extract the files
    
    Returns:
    str: Path to the extracted GDB directory
    """
    
    # Create output directory if it doesn't exist
    if not os.path.exists(out_dir):
        print(f'Creating {out_dir}')
        os.makedirs(out_dir)

    # Get filename from URL if not provided
    if out_zip_name is None:
        print('Did not provide output zip file name, extracting from URL')
        out_zip_name = os.path.basename(urlparse(in_url).path)
        
        # Check if file has zip or 7z extension
        if not re.match(r'.*\.(zip|7z)$', out_zip_name):
            raise ValueError("Could not extract file name with zip or 7z extension from URL")

    out_zip_path = os.path.join(out_dir, out_zip_name)
    
    # Download the file
    print(f"Downloading from {in_url}...")
    response = requests.get(in_url, stream=True)
    response.raise_for_status()  # Raise an error for bad status codes
    
    # Save the zip file
    if not os.path.exists(out_zip_path):
        with open(out_zip_path, 'wb') as f:
            shutil.copyfileobj(response.raw, f)
    
    # Extract the zip file
    print("Extracting zip file...")
    with ZipFile(out_zip_path, 'r') as zip_ref:
        zip_ref.extractall(out_dir)
        # Get the name of the first directory in the zip file
        first_file = zip_ref.namelist()[0]
        unzipped_dir = os.path.dirname(first_file)
    
    # Return the path to the extracted GDB directory
    return os.path.join(out_dir, unzipped_dir)

In [None]:
#Download Water Body Dataset
wbd_url = "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip"
nhd_dir = os.path.join(datdir, "nhd")  # Adjust this path as needed

try:
    extracted_path = download_and_extract_zip(in_url=wbd_url, 
                                              out_dir=nhd_dir)
    print(f"Files extracted to: {extracted_path}")
except Exception as e:
    print(f"An error occurred: {str(e)}")

Did not provide output zip file name, extracting from URL
Downloading from https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip...


In [None]:
#Download GLOWS (for now)
def download_s3_bucket_contents():
    # Parse the URL to get bucket name
    bucket_name = "geoglows-v2"
    
    # Create an S3 client
    s3 = boto3.client('s3',
                      region_name='us-west-2',
                      config=boto3.Config(signature_version=boto3.UNSIGNED))
    
    # Create a directory to store downloads
    if not os.path.exists('downloads'):
        os.makedirs('downloads')
    
    try:
        # List all objects in the bucket
        paginator = s3.get_paginator('list_objects_v2')
        page_iterator = paginator.paginate(Bucket=bucket_name, Prefix='streams/')
        
        # Download each object
        for page in page_iterator:
            if 'Contents' in page:
                for obj in page['Contents']:
                    # Get the object key
                    key = obj['Key']
                    
                    # Create local directory structure if needed
                    local_path = os.path.join('downloads', key)
                    os.makedirs(os.path.dirname(local_path), exist_ok=True)
                    
                    # Download the file
                    print(f"Downloading: {key}")
                    s3.download_file(bucket_name, key, local_path)
                    
        print("Download completed successfully!")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Execute the function
if __name__ == "__main__":
    download_s3_bucket_contents()



In [None]:
#Download GADM
