In [1]:
import os 
import subprocess
from glob import glob
import time 
import yaml 
from os.path import join,isfile, exists

archieve_dpath = "/media/ljp238/12TBWolf/ARCHIEVE"
outdir = f"{archieve_dpath}/ARCHIVE_VRT"

# wflowb: gentiles 
brchieve_dpath =  "/media/ljp238/12TBWolf/BRCHIEVE"
brchieve_dpath12 = f"{brchieve_dpath}/TILES12"
brchieve_dpath30 = f"{brchieve_dpath}/TILES30"
brchieve_dpath90 = f"{brchieve_dpath}/TILES90"

In [2]:
def create_text_file(key, files, output_dir, overwrite=False):
    """Create a text file listing the raster file paths."""
    txt_path = join(output_dir, f"{key}.txt")
    
    if isfile(txt_path) and overwrite is False:
        print(f"[SKIP] Text file already exists: {txt_path}")
        return txt_path

    with open(txt_path, "w") as txt_file:
        txt_file.write("\n".join(files) + "\n")
    
    print(f"[INFO] Created text file: {txt_path}")
    return txt_path

def create_vrt_file(key, txt_path, output_dir,epsg="4749",overwrite=False):
    """Create a VRT file using gdalbuildvrt."""
    vrt_path = join(output_dir, f"{key}.vrt")

    if isfile(vrt_path) and overwrite is False:
        print(f"[SKIP] VRT file already exists: {vrt_path}")
        return vrt_path

    if epsg is None:
        cmd = ["gdalbuildvrt", "-input_file_list", txt_path, vrt_path]
    else:
        cmd = ["gdalbuildvrt", "-a_srs", f"EPSG:{epsg}", "-input_file_list", txt_path, vrt_path]

    try:
        subprocess.run(cmd, check=True)
        print(f"[INFO] Created VRT file: {vrt_path}")
    except subprocess.CalledProcessError as e:
        print(f"Error: gdalbuildvrt failed with error code {e.returncode}")
    return vrt_path

def save_yaml(data, file_path):
    """Save dictionary data to a YAML file.""" # add overwrite
    #if not exists(file_path) and overwrite is False:
    with open(file_path, "w") as file:
        yaml.dump(data, file, default_flow_style=False,allow_unicode=True)

def print_file_length(dt,key):
    print(f"{key} {len(dt[key])} files")

In [None]:
#yaml_filename = join(outdir,"format_variable_and_files.yaml")

def loadfiles_byvariable(archieve_dpath, outdir):
    ti = time.perf_counter()
    ds = {}
    ds["tdem_dem"] = glob(f"{archieve_dpath}/TDEMX/*/DEM/*_DEM.tif")
    ds["tdem_wam"] = glob(f"{archieve_dpath}/TDEMX/*/AUXFILES/*WAM.tif")
    ds["tdem_lsm"] = glob(f"{archieve_dpath}/TDEMX/*/AUXFILES/*LSM.tif")
    ds["tdem_hem"] = glob(f"{archieve_dpath}/TDEMX/*/AUXFILES/*HEM.tif")
    ds["tdem_cov"] = glob(f"{archieve_dpath}/TDEMX/*/AUXFILES/*COV.tif")
    ds["tdem_com"] = glob(f"{archieve_dpath}/TDEMX/*/AUXFILES/*COM.tif")
    print_file_length(ds,"tdem_dem")
    print_file_length(ds,"tdem_wam")
    print_file_length(ds,"tdem_hem")
    print_file_length(ds,"tdem_cov")
    print_file_length(ds,"tdem_com")
    ds["edem_wgs"] = glob(f"{archieve_dpath}/EDEMx/TILES/comprexn/*/EDEM/*_EDEM_W84.tif")
    ds["edem_egm"] = glob(f"{archieve_dpath}/EDEMx/TILES/comprexn/*/EDEM/*_EDEM_EGM.tif")
    ds["edem_lcm"] = glob(f"{archieve_dpath}/EDEMx/TILES/comprexn/*/EDEM_AUXFILES/*LCM.tif")
    ds["edem_hem"] = glob(f"{archieve_dpath}/EDEMx/TILES/comprexn/*/EDEM_AUXFILES/*HEM.tif")
    ds["edem_edm"] = glob(f"{archieve_dpath}/EDEMx/TILES/comprexn/*/EDEM_AUXFILES/*EDM.tif")
    print_file_length(ds,"edem_wgs")
    print_file_length(ds,"edem_egm")
    print_file_length(ds,"edem_lcm")
    print_file_length(ds,"edem_hem")
    print_file_length(ds,"edem_edm")


    ds["wsfbh"] = [f"{archieve_dpath}/WSF3D/data/WSFBH/WSF3D_V02_BuildingHeight.tif"]
    ds["pdem"] = [f"{archieve_dpath}/PBAND_DTM/RNG/NegroAOITDX08.tif"]
    ds["egm08"] = [f"{archieve_dpath}/GEOID/GLOBAL/us_nga_egm2008_1.tif"]
    ds["fbchm"] = glob(f"{archieve_dpath}/FB_CHM/RESAMPLE/sorted_files/*/*.tif")
    ds["etchm"] = glob(f"{archieve_dpath}/ETH_CHM/data/*/*/*.tif")
    ds["esawc"] = glob(f"{archieve_dpath}/ESAWC/data/v200/2021/map_tiled/*/*.tif")

    print_file_length(ds,"wsfbh")
    print_file_length(ds,"pdem")
    print_file_length(ds,"egm08")
    print_file_length(ds,"esawc") 
    print_file_length(ds,"etchm") 
    print_file_length(ds,"fbchm") 

    ds["gedi_dtm"] = [f"{archieve_dpath}/GEDI/GRID/comprexn/GEDI_L3_be/GEDI03_elev_lowestmode_mean_2019108_2022019_002_03_EPSG4326.tif"]
    ds["gedi_dsm"] = [f"{archieve_dpath}/GEDI/GRID/comprexn/GEDI_L3_vh/GEDI03_rh100_mean_2019108_2022019_002_03_EPSG4326.tif"]
    ds["cdem_wbm"] = glob(f"{archieve_dpath}/CDEM/WBM/wbm_auto/*/*/*WBM.tif")
    print_file_length(ds,"gedi_dtm")
    print_file_length(ds,"gedi_dsm")
    print_file_length(ds,"cdem_wbm")

    ds["ldem"] = glob(f"{archieve_dpath}/LIDAR_DTM/*/*.tif"); # fix heeterogeous stuff AMZ

    ds["s2"] = glob(f"{archieve_dpath}/S2/comprexn/*/*.tif") 
    ds["s1"] = glob(f"{archieve_dpath}/S1/comprexn/*/*.tif")
    print_file_length(ds,"ldem")
    print_file_length(ds,"s2")
    print_file_length(ds,"s1")
    # glob(f"{archieve_dpath}/S1/comprexn/*/*.tif")[0]

    yaml_filename = join(outdir,"loadfiles_byvariable.yaml")
    save_yaml(data=ds, file_path=yaml_filename)
    ti = time.perf_counter() - ti 
    print(f'loadfiles_byvariable @{tf/60} min(s)')
    return yaml_filename


tdem_dem 17 files
tdem_wam 17 files
tdem_hem 17 files
tdem_cov 17 files
tdem_com 17 files
edem_wgs 17 files
edem_egm 17 files
edem_lcm 17 files
edem_hem 17 files
edem_edm 17 files
wsfbh 1 files
pdem 1 files
egm08 1 files
esawc 2651 files
etchm 5302 files
fbchm 84 files
gedi_dtm 1 files
gedi_dsm 1 files
cdem_wbm 26450 files
ldem 9 files
s2 22032 files
s1 21888 files


In [73]:
d1,d2 = {},{}
for key in ds.keys():
    outdpath = os.path.join(outdir,key)
    os.makedirs(outdpath,exist_ok=True)
    txt_path = os.path.join(outdpath, f"{key}.txt")
    files = ds[key]
    print(f"{key} {len(files)} tif files")
    txt_path = create_text_file(key, files, outdpath, overwrite=True)
    vrt_path = create_vrt_file(key, txt_path, outdpath,epsg="4749",overwrite=False)
    d1[key] = txt_path
    d2[key] = vrt_path

vars_vrts_yaml = join(outdir, "vars_vrts.yaml")
vars_txts_yaml = join(outdir, "vars_txts.yaml")

save_yaml(data=d1, file_path=vars_vrts_yaml)
save_yaml(data=d2, file_path=vars_txts_yaml)

tdem_dem 17 tif files
[INFO] Created text file: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_dem/tdem_dem.txt
[SKIP] VRT file already exists: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_dem/tdem_dem.vrt
tdem_wam 17 tif files
[INFO] Created text file: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_wam/tdem_wam.txt
[SKIP] VRT file already exists: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_wam/tdem_wam.vrt
tdem_lsm 17 tif files
[INFO] Created text file: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_lsm/tdem_lsm.txt
[SKIP] VRT file already exists: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_lsm/tdem_lsm.vrt
tdem_hem 17 tif files
[INFO] Created text file: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_hem/tdem_hem.txt
[SKIP] VRT file already exists: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_hem/tdem_hem.vrt
tdem_cov 17 tif files
[INFO] Created text file: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_cov/tdem_cov.txt
[SKIP] VRT file already exists: /media



...10...20...30...40...50...60...70...80...90...100 - done.
[INFO] Created VRT file: /media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/s1/s1.vrt


In [56]:
d2

{'tdem_dem': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_dem/tdem_dem.vrt',
 'tdem_wam': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_wam/tdem_wam.vrt',
 'tdem_lsm': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_lsm/tdem_lsm.vrt',
 'tdem_hem': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_hem/tdem_hem.vrt',
 'tdem_cov': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_cov/tdem_cov.vrt',
 'tdem_com': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/tdem_com/tdem_com.vrt',
 'edem_wgs': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/edem_wgs/edem_wgs.vrt',
 'edem_egm': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/edem_egm/edem_egm.vrt',
 'edem_lcm': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/edem_lcm/edem_lcm.vrt',
 'edem_hem': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/edem_hem/edem_hem.vrt',
 'edem_edm': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/edem_edm/edem_edm.vrt',
 'wsfbh': '/media/ljp238/12TBWolf/ARCHIEVE/ARCHIVE_VRT/wsfbh/wsfbh.vrt',
 'pdem': '/media/ljp238/1

In [59]:
save_yaml(data=d1, file_path=vars_vrts_yaml)
save_yaml(data=d2, file_path=vars_txts_yaml)
save_yaml(data=ds, file_path=yaml_filename)