In [3]:
import os
from uvars import dataops_dir
import sys 
sys.path.append(dataops_dir)
from dataops import tileops
from glob import glob

In [4]:
vpath = "/home/ljp238/Downloads/BLOCKsV/SA.gpkg"
gridpath = "/home/ljp238/Downloads/FABDEM_v1-2_tiles.geojson"
gee_download_path = "/media/ljp238/12TBWolf/ARCHIEVE/GEEDownload/"
ps = 256
st = 256
cpus = 20

In [5]:
# utilenames, utilebboxs, xgdf = tileops.get_tilenames_from_global_grid_vector(vpath, gridpath)
# print("Tile names:", len(utilenames))

In [6]:
def create_patches_from_vblock_pipeline(gee_download_path,vpath,gridpath,ps=256,st=256,cpus=20):

    utilenames, utilebboxs, xgdf = tileops.get_tilenames_from_global_grid_vector(vpath, gridpath)
    print("Tile names:", len(utilenames))

    gpkg_block_dir = os.path.join(gee_download_path, "SA", "GPKG")#
    tif_block_dir = os.path.join(gee_download_path, "SA", "TIF")#
    os.makedirs(gpkg_block_dir, exist_ok=True)
    os.makedirs(tif_block_dir, exist_ok=True)
    outputs = tileops.processing_inpar(xgdf, utilenames, gpkg_block_dir, max_workers=cpus)
    gpkg_files = glob(f"{gpkg_block_dir}/*/*.gpkg")
    print("Total GPKG files created:", len(gpkg_files))
    # dummy_tifs = create_dummy_rasters_inseq(vfiles, geepathes_dir)
    dummy_tifs = tileops.create_dummy_rasters_inpar(gpkg_files, tif_block_dir, num_workers=cpus)

    for tifi in dummy_tifs:
        print("Processing TIF:", tifi)
        tileops.patchify_workflow(tifi,ps,st,cpus=cpus)




In [11]:
from tqdm import tqdm

In [12]:
def create_patches_from_vblock_pipeline(gee_download_path, vpath, gridpath, ps=256, st=256, cpus=20):
    utilenames, utilebboxs, xgdf = tileops.get_tilenames_from_global_grid_vector(vpath, gridpath)
    print("Tile names:", len(utilenames))

    gpkg_block_dir = os.path.join(gee_download_path, "SA", "GPKG")
    tif_block_dir = os.path.join(gee_download_path, "SA", "TIF")
    os.makedirs(gpkg_block_dir, exist_ok=True)
    os.makedirs(tif_block_dir, exist_ok=True)

    # Step 1: Create GPKGs
    outputs = tileops.processing_inpar(xgdf, utilenames, gpkg_block_dir, max_workers=cpus)
    gpkg_files = glob(f"{gpkg_block_dir}/*/*.gpkg")
    print("Total GPKG files created:", len(gpkg_files))

    # Step 2: Create Dummy TIFs
    dummy_tifs = tileops.create_dummy_rasters_inpar(gpkg_files, tif_block_dir, num_workers=cpus)

    # Step 3: Patchify each TIF with a progress bar
    for tifi in tqdm(dummy_tifs, desc="Processing TIFs with patchify_workflow"):
        tileops.patchify_workflow(tifi, ps, st, cpus=cpus)

In [13]:
create_patches_from_vblock_pipeline(gee_download_path=gee_download_path,
                                    vpath=vpath,gridpath=gridpath,
                                    ps=ps,st=st,cpus=cpus)

Tile names: 1855


Processing tiles (Parallel): 100%|██████████| 1855/1855 [00:00<00:00, 5961.26it/s]


Total GPKG files created: 1855


Creating dummy rasters (Parallel): 100%|██████████| 1855/1855 [00:00<00:00, 7112.60it/s]
Processing TIFs with patchify_workflow: 100%|██████████| 1855/1855 [2:16:13<00:00,  4.41s/it]  


In [21]:
import os
import geopandas as gpd
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# --- Base function to process a single tile ---
def process_single_tile(xgdf, tilename, gpkg_block_dir):
    patches_tile_dir = os.path.join(gpkg_block_dir, tilename)
    os.makedirs(patches_tile_dir, exist_ok=True)
    tile_gpkg = os.path.join(patches_tile_dir, f"{tilename}.gpkg")
    
    if os.path.isfile(tile_gpkg):
        # File already exists, skip
        return "skipped", tile_gpkg
    
    # Otherwise, create the GPKG
    subgdf = xgdf[xgdf['tile_name'] == tilename][['tile_name', 'geometry']]
    subgdf.to_file(tile_gpkg, driver="GPKG")
    return "written", tile_gpkg

# --- Sequential processing with progress bar ---
def processing_inseq(xgdf, utilenames, gpkg_block_dir):
    outputs = []
    for tilename in tqdm(utilenames, desc="Processing tiles (Sequential)"):
        status, output = process_single_tile(xgdf, tilename, gpkg_block_dir)
        outputs.append((status, output))
    return outputs

# --- Parallel processing with progress bar ---
def processing_inpar(xgdf, utilenames, gpkg_block_dir, max_workers=4):
    outputs = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_single_tile, xgdf, tilename, gpkg_block_dir): tilename for tilename in utilenames}
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing tiles (Parallel)"):
            status, output = future.result()
            outputs.append((status, output))
    return outputs


In [33]:
import os
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.transform import from_origin
from multiprocessing import Pool
from tqdm import tqdm
from typing import List, Tuple

# --- Helper: Get raster properties from vector ---
def vector2raster_properties(vpath: str, epsg: int = 4326, res: float = 1/3600) -> Tuple[int, int, rasterio.transform.Affine]:
    vector = gpd.read_file(vpath)
    bbox = vector.total_bounds  # [minx, miny, maxx, maxy]
    width = int((bbox[2] - bbox[0]) / res) + 1
    height = int((bbox[3] - bbox[1]) / res) + 1
    transform = from_origin(bbox[0], bbox[3], res, res)
    return width, height, transform

# --- Helper: Create a dummy raster ---
def make_dummy_raster(vpath: str, rpath: str, epsg: int = 4326, res: float = 1/3600) -> None:
    width, height, transform = vector2raster_properties(vpath, epsg=epsg, res=res)
    with rasterio.open(
        rpath, 'w', driver='GTiff', height=height, width=width,
        count=1, dtype='uint8', crs=f"EPSG:{epsg}", transform=transform,
        compress='lzw'
    ) as dst:
        random_data = np.random.choice([0, 1], size=(height, width), p=[0.5, 0.5])
        dst.write(random_data.astype('uint8'), 1)

# --- Helper: Process one vector file ---
def process_single_vector(args: Tuple[str, str, int, float]) -> str:
    vpath, geepathes_dir, epsg, res = args
    tname = os.path.splitext(os.path.basename(vpath))[0]
    tname_dir = os.path.join(geepathes_dir, tname)
    os.makedirs(tname_dir, exist_ok=True)
    tname_fn = os.path.join(tname_dir, f"{tname}.tif")

    if not os.path.isfile(tname_fn):
        make_dummy_raster(vpath, tname_fn, epsg=epsg, res=res)
    return tname_fn

# --- Sequential Processing ---
def create_dummy_rasters_inseq(vfiles: List[str], geepathes_dir: str, epsg: int = 4326, res: float = 1/3600) -> List[str]:
    dummy_tifs = []
    for vpath in tqdm(vfiles, desc="Creating dummy rasters (Sequential)"):
        tif_path = process_single_vector((vpath, geepathes_dir, epsg, res))
        dummy_tifs.append(tif_path)
    return dummy_tifs

# --- Parallel Processing (with Pool) ---
def create_dummy_rasters_inpar(vfiles: List[str], geepathes_dir: str, epsg: int = 4326, res: float = 1/3600, num_workers: int = 4) -> List[str]:
    args = [(vpath, geepathes_dir, epsg, res) for vpath in vfiles]
    dummy_tifs = []

    with Pool(processes=num_workers) as pool:
        for tif_path in tqdm(pool.imap_unordered(process_single_vector, args), total=len(vfiles), desc="Creating dummy rasters (Parallel)"):
            dummy_tifs.append(tif_path)
    return dummy_tifs


Creating dummy rasters (Parallel): 100%|██████████| 1855/1855 [01:14<00:00, 24.96it/s]


make all this into one pipeline end 2 end 