In [17]:
import os
import glob
import rasterio
from rasterio.merge import merge
import numpy as np
from tqdm import tqdm

In [18]:
def merge_tif_files_by_time(main_folder, output_folder, nodata_value=-9999, start_time=900, end_time=2000):
    os.makedirs(output_folder, exist_ok=True)
    tile_folders = os.listdir(main_folder)

    for tile_folder in tqdm(tile_folders, desc="Processing Tile Folders"):
        tile_path = os.path.join(main_folder, tile_folder)

        if not os.path.isdir(tile_path):
            continue 
        
        tif_files = glob.glob(os.path.join(tile_path, '*.TIF'))
        
        files_by_time = {}

        for tif_file in tif_files:
            file_name = os.path.basename(tif_file)
            parts = file_name.split('_')
            
            # skip the agglomerated file
            if len(parts) < 5:
                continue
            
            time_str = parts[4]
            time = int(time_str)
            
            date_str = parts[3]
                    
            if start_time <= time <= end_time:
                if time not in files_by_time:
                    files_by_time[time] = []
                files_by_time[time].append(tif_file)
        
        for time, files in tqdm(files_by_time.items(), desc=f"Merging by time for {tile_folder}", leave=False):
            if not files:
                continue 
            
            src_files_to_mosaic = []
            bounds = []
            resolutions = []

            # open all files and save bounds
            for file in files:
                src = rasterio.open(file)
                src_files_to_mosaic.append(src)
                bounds.append(src.bounds)
                resolutions.append(src.res[0])  # pixels should be square

            # calculating merged bounds
            min_x = min(b[0] for b in bounds)
            min_y = min(b[1] for b in bounds)
            max_x = max(b[2] for b in bounds)
            max_y = max(b[3] for b in bounds)

            # Output shape based on the merged bounds and resolution
            out_shape = (1, int((max_y - min_y) / resolutions[0]), int((max_x - min_x) / resolutions[0]))  # (bands, height, width)
            out_transform = rasterio.transform.from_bounds(min_x, min_y, max_x, max_y, out_shape[2], out_shape[1])
            
            # init mosaic
            mosaic = np.full(out_shape, nodata_value, dtype=np.float32)
            print(f" moasic {mosaic[0].shape}")

            for src in src_files_to_mosaic:
                data = src.read(1).astype(np.float32)  
                
                # getting start position of columns and rows
                col_start, row_start = ~out_transform * (src.bounds.left, src.bounds.top)
                row_start, col_start = int(round(row_start)), int(round(col_start))

                # Ensure indices are within bounds
                if (0 <= row_start < mosaic.shape[1] and 0 <= col_start < mosaic.shape[2]):
                    
                    window_height, window_width = data.shape
                                    
                    end_row = min(row_start + window_height, mosaic.shape[1])
                    end_col = min(col_start + window_width, mosaic.shape[2])

                    data_slice = data[:end_row - row_start, :end_col - col_start]

                    mosaic[0, row_start:end_row, col_start:end_col] = np.where(
                        (mosaic[0, row_start:end_row, col_start:end_col] == nodata_value) |  # Condition 1: Replace if nodata
                        ((data_slice < mosaic[0, row_start:end_row, col_start:end_col]) & 
                         (data_slice != nodata_value)),  # Condition 2: Replace if incoming is lower and not nodata
                        data_slice,  # Value to use (incoming data)
                        mosaic[0, row_start:end_row, col_start:end_col]  # Keep existing value
                    )

                else:
                    print(f"Warning: Data for {src.name} is outside the bounds of the mosaic, skipping.")

            # Replace any remaining NaN values with nodata_value
            mosaic = np.nan_to_num(mosaic, nan=nodata_value)          

            out_meta = src_files_to_mosaic[0].meta.copy()
            out_meta.update({
                "driver": "GTiff",
                "height": mosaic.shape[1],
                "width": mosaic.shape[2],
                "transform": out_transform,
                "count": mosaic.shape[0],  # Band count
                "nodata": nodata_value
            })
            
            # Create output
            output_file = os.path.join(output_folder, f"{tile_folder}_{date_str}_{time}.TIF")
            with rasterio.open(output_file, "w", **out_meta) as dest:
                dest.write(mosaic)
            
            print(f"Merged {len(files)} TIF files for tile {tile_folder} at time {time} into {output_file}")

# Execute the function

In [19]:
main_folder = "D:/Geomatics/test_merging"
output_folder = "D:\Geomatics/test_merging_output"
merge_tif_files_by_time(main_folder, output_folder)

Processing Tile Folders:   0%|          | 0/1 [00:00<?, ?it/s]
Merging by time for 25BZ1:   0%|          | 0/23 [00:00<?, ?it/s][A

 moasic (12538, 10038)
Data slice: [[0.1 0.1 0.1 ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 ...
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]]
Existing mosaic slice: [[-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 ...
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]]
New mosaic slice: [[0.1 0.1 0.1 ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 ...
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]]
Data slice: [[0.1 0.1 0.1 ... 1.  1.  1. ]
 [0.1 0.1 0.1 ... 1.  1.  1. ]
 [0.1 0.1 0.1 ... 1.  1.  1. ]
 ...
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]]
Existing mosaic slice: [[ 1.000e-01 


Merging by time for 25BZ1:   4%|▍         | 1/23 [00:07<02:41,  7.34s/it][A

Merged 23 TIF files for tile 25BZ1 at time 900 into D:\Geomatics/test_merging_output\25BZ1_20150701_900.TIF
 moasic (12538, 10038)
Data slice: [[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
Existing mosaic slice: [[-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 ...
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]]
New mosaic slice: [[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
Data slice: [[0.1 0.1 0.1 ... 1.  1.  1. ]
 [0.1 0.1 0.1 ... 1.  1.  1. ]
 [0.1 0.1 0.1 ... 1.  1.  1. ]
 ...
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]
 [1.  1.  1.  ... 1.  1.  1. ]]



Processing Tile Folders:   0%|          | 0/1 [00:08<?, ?it/s]           [A

New mosaic slice: [[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 0. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]





KeyboardInterrupt: 