In [31]:
import os, glob
import subprocess
import tempfile
from tqdm import tqdm
import copy
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray
from rioxarray import merge
from osgeo import gdal
from joblib import Parallel, delayed

#os.environ['GDAL_NUM_THREADS'] = 'ALL_CPUS' # probably not necessary as does not trigger anything in gdal_translate or gdal2tiles

In [35]:
rgi = 17
version = '62'
MAX_VAL_MAPBOX = 800
cmap = plt.get_cmap('turbo', 256)
PATH_TIFFS_IN = f"/media/maffe/nvme/iceboost_global_deploy/iceboost_20250212/RGI{version}/rgi{rgi}"
tif_filenames = [os.path.basename(file) for file in glob.glob(f'{PATH_TIFFS_IN}/*.tif')]
output_directory = f"/media/maffe/nvme/iceboost_global_deploy/iceboost_20250212/RGI{version}/mapbox_rgi{rgi}/"
print(f"We will process {len(tif_filenames)} files from {PATH_TIFFS_IN}")
n_jobs = 8  # Number of parallel processes

We will process 15908 files from /media/maffe/nvme/iceboost_global_deploy/iceboost_20250212/RGI62/rgi17


In [36]:
# Process individual TIF files and save them as .tif in the temporary /tmp folder (we will remove them in the end)

def process_tif(tif_name, path_tiffs_in, cmap, max_val_mapbox):
    """Function to process a single tif file."""
    tif = rioxarray.open_rasterio(f"{path_tiffs_in}/{tif_name}").sel(band=1)  # EPSG:4326
    tif = tif.squeeze().rio.reproject(dst_crs="EPSG:3857").clip(0, max_val_mapbox).fillna(0)
    
    rgba_data = cmap(tif.values / max_val_mapbox) * 255
    rgba_data[:, :, 3] = 127  # Set alpha to half transparency
    rgba_data[tif.values == 0] = 0
    rgba_data = rgba_data.astype(np.uint8)
    
    rgb_data_array = xr.DataArray(
        rgba_data, dims=('y', 'x', 'band'),
        coords={'x': tif.coords['x'], 'y': tif.coords['y']}
    ).transpose('band', 'y', 'x')
    
    rgb_data_array.rio.write_crs("EPSG:3857", inplace=True)
    rgb_data_array.rio.write_nodata(0, inplace=True)
    
    # Skip 1-pixel glaciers
    if rgb_data_array.shape[1] == 1 or rgb_data_array.shape[2] == 1:
        return None
    
    # Save to a temporary file and return its path
    temp_file = tempfile.NamedTemporaryFile(suffix=".tif", delete=False).name
    rgb_data_array.rio.to_raster(temp_file, compress="deflate")
    return temp_file

# Parallel processing
processed_tifs = Parallel(n_jobs=n_jobs)(
    delayed(process_tif)(tif_name, PATH_TIFFS_IN, cmap, MAX_VAL_MAPBOX) 
    for tif_name in tqdm(tif_filenames, total=len(tif_filenames))
)

# Remove None values (from skipped 1-pixel glaciers)
processed_tifs = [tif for tif in processed_tifs if tif is not None]

100%|████████████████████████████████████| 15908/15908 [01:01<00:00, 257.66it/s]


In [37]:
vrt_path = "temp.vrt"  # Temporary VRT file

try:
    # Step 1. Virtual Raster (VRT)
    print(f'Begin VRT creation')
    t1_0 = time.time()
    gdal.BuildVRT(vrt_path, processed_tifs)
    if os.path.exists(vrt_path):
        print(f"{vrt_path} successfully created.")
        vrt_size = os.path.getsize(vrt_path)
        print(f"Size of {vrt_path}: {vrt_size / 1024:.2f} KB")
    else:
        raise FileNotFoundError(f"Failed to create {vrt_path}.")
    print(f"End VRT creation in {time.time()-t1_0}")

    # Step 2. Create a temporary GeoTIFF with the mask applied
    # gdal.Warp could be a slightly faster option than gdal.Translate
    with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as temp_tif:
        temp_tif_path = temp_tif.name
    translate_command = ['gdal_translate', '-mask', '4', '-co', 'COMPRESS=DEFLATE', vrt_path, temp_tif_path]

    print(f'Begin to translate command')
    t2_0 = time.time()
    subprocess.run(translate_command, check=True)
    file_size = os.path.getsize(temp_tif_path)  # File size in bytes
    file_size_mb = file_size / (1024 * 1024)  # Convert to MB
    print(f"Temporary masked GeoTIFF created at: {temp_tif_path} ({file_size_mb:.2f} MB) in {time.time()-t2_0:.2f} seconds")

    # Step 3. Run gdal2tiles on the result of gdal_translate
    tiles_command = ['gdal2tiles.py',
                     '-z', '5-10',
                     '-x',  # Exclude transparent tiles from result tileset
                     '-r', 'bilinear',
                     temp_tif_path, output_directory]
    subprocess.run(tiles_command, check=True)
    print(f"gdal2tiles completed successfully. Tiles saved to: {output_directory}")

finally:
    # Step 4. Remove temporary files
    for file_path in processed_tifs + [temp_tif_path, vrt_path]:
        if os.path.exists(file_path):
            os.remove(file_path)
            print(f"Deleted {file_path}.")

Begin VRT creation
temp.vrt successfully created.
Size of temp.vrt: 28446.94 KB
End VRT creation in 13.806955575942993
Begin to translate command
Input file size is 11455, 60990
0...10...20...30...40...50...60...70...80...90...100 - done.
Temporary masked GeoTIFF created at: /tmp/tmps3xebd4k.tif (37.41 MB) in 20.57 seconds
0

Generating Base Tiles:


...10...20...30...40...50...60...70...80...90...

Generating Overview Tiles:


100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.
gdal2tiles completed successfully. Tiles saved to: /media/maffe/nvme/iceboost_global_deploy/iceboost_20250212/RGI62/mapbox_rgi17/
Deleted /tmp/tmpwp1gz4b_.tif.
Deleted /tmp/tmp7_r6s2x6.tif.
Deleted /tmp/tmprainc58c.tif.
Deleted /tmp/tmp5mifbe0z.tif.
Deleted /tmp/tmpsn7ynz2f.tif.
Deleted /tmp/tmpdvve9j85.tif.
Deleted /tmp/tmpk_h_kttc.tif.
Deleted /tmp/tmpjb6qcfgy.tif.
Deleted /tmp/tmpi3wwiult.tif.
Deleted /tmp/tmp5w2ox2ls.tif.
Deleted /tmp/tmpkp7lrgvo.tif.
Deleted /tmp/tmpcrmylo_f.tif.
Deleted /tmp/tmpq9npuc_2.tif.
Deleted /tmp/tmpt4yju4ta.tif.
Deleted /tmp/tmpon2ndoaf.tif.
Deleted /tmp/tmpyfpurb4m.tif.
Deleted /tmp/tmpvgdxw0ij.tif.
Deleted /tmp/tmpiaqpe7vf.tif.
Deleted /tmp/tmp_yyxs3kd.tif.
Deleted /tmp/tmpth0k0bkn.tif.
Deleted /tmp/tmpgxii00ja.tif.
Deleted /tmp/tmpmwv0ywoi.tif.
Deleted /tmp/tmppsda4cfa.tif.
Deleted /tmp/tmpxdj62508.tif.
Deleted /tmp/tmp4th70r3y.tif.
Deleted /tmp/tmpq1doeh9t.tif.
Deleted /tmp/tmps

In [38]:
# If something is left in /tmp remove any .tif file (it should not be necessary)

# Find all .tif files in /tmp
junk_tif_files = glob.glob("/tmp/*.tif")

# Calculate total size of the .tif files
total_size_mb = sum(os.path.getsize(f) for f in junk_tif_files if os.path.isfile(f)) / (1024 ** 2)
num_files = len(junk_tif_files)

# Delete each file
for file_path in junk_tif_files:
    os.remove(file_path)
    #print(f"Deleted {file_path}")

print(f"Deleted {num_files} .tif files, freeing {total_size_mb:.2f} MB.")

Deleted 0 .tif files, freeing 0.00 MB.
