In [1]:
# make the functions from this and fadem from dataops

In [4]:
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import List

# Configuration
DOWNLOAD_DIR = "downloads"
MAX_WORKERS = 10
TIMEOUT = 2000  # seconds

def read_urls(file_path: str) -> List[str]:
    """Read a list of URLs from a text file."""
    with open(file_path, "r") as f:
        return [line.strip() for line in f if line.strip()]

def download_file(url: str, download_dir: str = DOWNLOAD_DIR) -> str:
    """Download a single file from a URL."""
    try:
        os.makedirs(download_dir, exist_ok=True)
        local_filename = os.path.join(download_dir, os.path.basename(url))

        response = requests.get(url, stream=True, timeout=TIMEOUT)
        response.raise_for_status()

        with open(local_filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)

        return f"✅ Downloaded: {url}"
    except Exception as e:
        return f"❌ Failed: {url} | Reason: {e}"

def download_all(urls: List[str], download_dir,workers: int = MAX_WORKERS):
    """Download multiple files in parallel."""
    with ThreadPoolExecutor(max_workers=workers) as executor:
        future_to_url = {executor.submit(download_file, url,download_dir): url for url in urls}
        for future in as_completed(future_to_url):
            print(future.result())

def download_urls_inpar(txt_path,download_dir,workers):
    urls = read_urls(txt_path)
    if not urls:
        print("No URLs found.")
        return
    print(f"Starting download of {len(urls)} files...\n")
    download_all(urls,download_dir,workers)

In [7]:
data_dir = "/media/ljp238/12TBWolf/ARCHIEVE/ANADEM/data"
txt_path = "urls.txt"
download_urls_inpar(txt_path=txt_path,download_dir=data_dir,workers=10)

Starting download of 52 files...

✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_17L.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18K.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18H.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_17M.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18P.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_17N.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18F.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18G.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18M.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_19F.tif
✅ Downloaded: https://metadados.snirh.gov.br/files/anadem_v1_tiles/anadem_v1_18L.tif
✅ Downloaded: https://metadados

In [9]:
# create VRT and tileindex#
gpkg_fn = "/media/ljp238/12TBWolf/ARCHIEVE/ANADEM/ANADEM_tiles.gpkg"
tif_dir = "/media/ljp238/12TBWolf/ARCHIEVE/ANADEM/data"

In [10]:
cmd = f"gdaltindex -t_srs EPSG:4326 -f GPKG {gpkg_fn} {tif_dir}/*.tif"
os.system(cmd)

0

In [11]:
import os
import subprocess

def build_vrt_from_tifs(data_dir, output_vrt, recursive=False):
    """
    Create a VRT file from all .tif files in a given directory using gdalbuildvrt.

    Args:
        data_dir (str): Path to the directory containing .tif files.
        output_vrt (str): Path to the output .vrt file.
        recursive (bool): If True, search subdirectories recursively.
    """
    if not os.path.isdir(data_dir):
        raise ValueError(f"Directory does not exist: {data_dir}")

    # Gather all .tif files
    tif_files = []
    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.lower().endswith('.tif'):
                tif_files.append(os.path.join(root, file))
        if not recursive:
            break

    if not tif_files:
        raise RuntimeError(f"No .tif files found in {data_dir}")

    # Build gdalbuildvrt command
    cmd = ["gdalbuildvrt", output_vrt] + tif_files

    # Run command
    try:
        subprocess.run(cmd, check=True)
        print(f"VRT created: {output_vrt}")
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"gdalbuildvrt failed: {e}")


In [12]:
vrt_fn = "/media/ljp238/12TBWolf/ARCHIEVE/ANADEM/ANADEM_tiles.vrt"
tif_dir = "/media/ljp238/12TBWolf/ARCHIEVE/ANADEM/data"

In [13]:
build_vrt_from_tifs(tif_dir, vrt_fn, recursive=True)


0...10...20...30...40...50...60...70...80...90...100 - done.
VRT created: /media/ljp238/12TBWolf/ARCHIEVE/ANADEM/ANADEM_tiles.vrt
