In [11]:
# rename files in directory "filename_SNAP_SNAP.tif" to "filename_SNAP.tif"

import os
import sys

def rename_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith("_SNAP_SNAP.tif"):
            new_filename = filename.replace("_SNAP_SNAP", "_SNAP")
            old_path = os.path.join(directory, filename)
            new_path = os.path.join(directory, new_filename)
            os.rename(old_path, new_path)
            print(f'Renamed: {filename} to {new_filename}')

dir = "/mnt/d/data_2/output/pituffik_20_21/"

rename_files(dir)

Renamed: S1B_EW_GRDM_1SDH_20200101T115256_20200101T115356_019623_025165_5B02_SNAP_SNAP.tif to S1B_EW_GRDM_1SDH_20200101T115256_20200101T115356_019623_025165_5B02_SNAP.tif
Renamed: S1B_EW_GRDM_1SDH_20200425T114444_20200425T114544_021300_0286E6_EDC0_SNAP_SNAP.tif to S1B_EW_GRDM_1SDH_20200425T114444_20200425T114544_021300_0286E6_EDC0_SNAP.tif
Renamed: S1B_EW_GRDM_1SDH_20200427T112821_20200427T112921_021329_0287CC_BAF9_SNAP_SNAP.tif to S1B_EW_GRDM_1SDH_20200427T112821_20200427T112921_021329_0287CC_BAF9_SNAP.tif
Renamed: S1B_EW_GRDM_1SDH_20200428T120919_20200428T121019_021344_02883D_9DF8_SNAP_SNAP.tif to S1B_EW_GRDM_1SDH_20200428T120919_20200428T121019_021344_02883D_9DF8_SNAP.tif
Renamed: S1B_EW_GRDM_1SDH_20200429T205718_20200429T205828_021364_0288D8_86C0_SNAP_SNAP.tif to S1B_EW_GRDM_1SDH_20200429T205718_20200429T205828_021364_0288D8_86C0_SNAP.tif
Renamed: S1B_EW_GRDM_1SDH_20200430T115256_20200430T115356_021373_028920_21E2_SNAP_SNAP.tif to S1B_EW_GRDM_1SDH_20200430T115256_20200430T115356_02

## Pad and subset pipeline outputs

In [2]:
import os
from collections import Counter
import rasterio

def get_size_counts(input_dir):
    counts = Counter()
    for fn in os.listdir(input_dir):
        if not fn.lower().endswith('.tif'):
            continue
        with rasterio.open(os.path.join(input_dir, fn)) as src:
            counts[(src.width, src.height)] += 1
    return counts

if __name__ == "__main__":
    INPUT_DIR   = "/mnt/d/data_2/output/pituffik_20_21_EW/geotiffs/"
    size_counts = get_size_counts(INPUT_DIR)
    for size, cnt in size_counts.items():
        print(f"Size {size[0]}×{size[1]} : {cnt} files")

Size 3709×2015 : 86 files
Size 3708×2015 : 758 files
Size 3708×2016 : 8 files
Size 3041×2041 : 2 files
Size 3042×2041 : 1 files
Size 3043×2041 : 2 files
Size 3040×2041 : 2 files
Size 3041×2040 : 1 files
Size 3387×2029 : 1 files
Size 3118×2038 : 1 files
Size 3121×2039 : 2 files
Size 2745×2048 : 2 files
Size 2742×2048 : 1 files
Size 3134×2038 : 1 files
Size 3135×2037 : 1 files
Size 3132×2037 : 1 files
Size 3080×2040 : 3 files
Size 3082×2040 : 7 files
Size 3084×2039 : 4 files
Size 3078×2040 : 3 files
Size 3077×2040 : 3 files
Size 3074×2040 : 3 files
Size 3078×2039 : 6 files
Size 3079×2040 : 1 files
Size 3075×2039 : 3 files
Size 3076×2039 : 1 files
Size 3077×2039 : 1 files
Size 3079×2039 : 3 files
Size 3083×2039 : 2 files
Size 3082×2039 : 2 files
Size 3084×2040 : 2 files
Size 3087×2039 : 1 files
Size 3085×2039 : 1 files
Size 3177×2036 : 1 files
Size 3086×2039 : 1 files
Size 3088×2040 : 1 files
Size 3081×2039 : 1 files
Size 3080×2039 : 1 files
Size 3097×2039 : 1 files
Size 3128×2038 : 1 fil

In [None]:
# 1) Update these paths if needed:
INPUT_DIR   = "/mnt/d/data_2/output/pituffik_20_21/geotiffs/"

# 2) Set your target size:
TARGET_WIDTH, TARGET_HEIGHT = 3708, 2015

def get_reference(input_dir, target_size):
    for fn in os.listdir(input_dir):
        if not fn.lower().endswith('.tif'):
            continue
        path = os.path.join(input_dir, fn)
        with rasterio.open(path) as src:
            if (src.width, src.height) == target_size:
                return fn, src.transform, src.crs, src.nodata
    raise RuntimeError(f"No file of size {target_size} found")

if __name__ == "__main__":
    ref_fn, transform, crs, nodata = get_reference(INPUT_DIR, (TARGET_WIDTH, TARGET_HEIGHT))
    print("Reference file:   ", ref_fn)
    print("Size (WxH):       ", TARGET_WIDTH, "×", TARGET_HEIGHT)
    print("Affine transform: ", transform)
    print("CRS:              ", crs)
    print("NoData value:     ", nodata)

Reference file:    S1A_EW_GRDM_1SDH_20200104T112805_20200104T112909_030650_03833E_67E0_SNAP.tif
Size (WxH):        3708 × 2015
Affine transform:  | 40.00, 0.00, 416586.24|
| 0.00,-40.00, 8534472.43|
| 0.00, 0.00, 1.00|
CRS:               EPSG:32619
NoData value:      nan


In [6]:
# S1A_EW_GRDM_1SDH_20200102T114437_20200102T114541_030621_038228_07F4_SNAP.tif
import rasterio


path = '/mnt/d/data_2/output/pituffik_20_21/geotiffs/S1B_EW_GRDM_1SDH_20200106T214542_20200106T214642_019702_0253F6_C602_SNAP.tif'

with rasterio.open(path) as src:
    print("File 1:             ", src.name)
    print("Size (WxH):       ", src.width, "×", src.height)
    print("Affine transform: ", src.transform)
    print("CRS:              ", src.crs)
    print("NoData value:     ", src.nodata)


print("\n" + "="*50 + "\n")
path = '/mnt/d/data_2/output/pituffik_20_21/geotiffs/S1A_EW_GRDM_1SDH_20200104T112805_20200104T112909_030650_03833E_67E0_SNAP.tif'

with rasterio.open(path) as src:
    print("File 2:             ", src.name)
    print("Size (WxH):       ", src.width, "×", src.height)
    print("Affine transform: ", src.transform)
    print("CRS:              ", src.crs)
    print("NoData value:     ", src.nodata)


File 1:              /mnt/d/data_2/output/pituffik_20_21/geotiffs/S1B_EW_GRDM_1SDH_20200106T214542_20200106T214642_019702_0253F6_C602_SNAP.tif
Size (WxH):        3708 × 2015
Affine transform:  | 40.00, 0.00, 416586.19|
| 0.00,-40.00, 8534502.78|
| 0.00, 0.00, 1.00|
CRS:               EPSG:32619
NoData value:      nan


File 2:              /mnt/d/data_2/output/pituffik_20_21/geotiffs/S1A_EW_GRDM_1SDH_20200104T112805_20200104T112909_030650_03833E_67E0_SNAP.tif
Size (WxH):        3708 × 2015
Affine transform:  | 40.00, 0.00, 416586.24|
| 0.00,-40.00, 8534472.43|
| 0.00, 0.00, 1.00|
CRS:               EPSG:32619
NoData value:      nan


In [None]:
import os
import random
import numpy as np
import rasterio
from affine import Affine

# — User settings — 
INPUT_DIR   = "/mnt/d/data_2/output/pituffik_20_21/geotiffs/"
TEST_DIR    = "/mnt/d/data_2/output/pituffik_20_21/geotiffs_padded/"
REF_TRANSFORM = Affine(40.00, 0.00, 416586.24,
                       0.00,-40.00, 8534472.43)
TARGET_SIZE   = (3708, 2015)
NODATA_VALUE  = np.nan  # as printed

os.makedirs(TEST_DIR, exist_ok=True)

# 1) list outliers
outliers = []
for fn in os.listdir(INPUT_DIR):
    if not fn.lower().endswith('.tif'):
        continue
    with rasterio.open(os.path.join(INPUT_DIR, fn)) as src:
        if (src.width, src.height) != TARGET_SIZE:
            outliers.append(fn)

print(f"Found {len(outliers)} outlier files.")

# 2) sample up to five
sample = random.sample(outliers, min(len(outliers), 5))
print("Testing on:", sample)

# 3) padding function
def pad_to_size_safe(src_path, dst_path, ref_transform, target_size, nodata):
    import numpy as np
    import rasterio

    with rasterio.open(src_path) as src:
        data = src.read()
        px, py = src.res[0], abs(src.res[1])
        ref_left, ref_top = ref_transform.c, ref_transform.f
        tw, th = target_size

        # pixel offset of src origin in ref grid
        col_off = (src.bounds.left  - ref_left) / px
        row_off = (ref_top - src.bounds.top)  / py

        # integer window in ref grid
        col_off_i = int(np.floor(col_off))
        row_off_i = int(np.floor(row_off))

        # compute intersection in destination
        dst_col_start = max(col_off_i, 0)
        dst_row_start = max(row_off_i, 0)
        dst_col_end   = min(col_off_i + src.width,  tw)
        dst_row_end   = min(row_off_i + src.height, th)

        # corresponding source window
        src_col_start = max(0, -col_off_i)
        src_row_start = max(0, -row_off_i)
        src_col_end   = src_col_start + (dst_col_end  - dst_col_start)
        src_row_end   = src_row_start + (dst_row_end  - dst_row_start)

        # prepare output
        out = np.full((src.count, th, tw), nodata, dtype=data.dtype)
        out[:, dst_row_start:dst_row_end,
               dst_col_start:dst_col_end] = data[:, src_row_start:src_row_end,
                                                src_col_start:src_col_end]

        profile = src.profile.copy()
        profile.update({
            'width':     tw,
            'height':    th,
            'transform': ref_transform,
            'nodata':    nodata
        })

        with rasterio.open(dst_path, 'w', **profile) as dst:
            dst.write(out)

# 4) run the dry-run
for fn in outliers:
    src_path = os.path.join(INPUT_DIR, fn)
    dst_path = os.path.join(TEST_DIR, fn)
    pad_to_size_safe(src_path, dst_path, REF_TRANSFORM, TARGET_SIZE, NODATA_VALUE)
    print("Padded:", fn)


Found 172 outlier files.
Testing on: ['S1A_EW_GRDM_1SDH_20210824T114449_20210824T114554_039371_04A67E_52E9_SNAP.tif', 'S1B_EW_GRDM_1SDH_20201111T121738_20201111T121838_024217_02E097_F1F4_SNAP.tif', 'S1A_EW_GRDM_1SDH_20200729T115248_20200729T115352_033669_03E6F7_2B3A_SNAP.tif', 'S1B_EW_GRDM_1SDH_20210627T121739_20210627T121839_027542_0349AB_B328_SNAP.tif', 'S1A_EW_GRDM_1SDH_20200114T114436_20200114T114541_030796_038848_72FA_SNAP.tif']
Padded: S1A_EW_GRDM_1SDH_20200102T114437_20200102T114541_030621_038228_07F4_SNAP.tif
Padded: S1A_EW_GRDM_1SDH_20200110T121713_20200110T121818_030738_038634_EDE3_SNAP.tif
Padded: S1A_EW_GRDM_1SDH_20200111T112002_20200111T112106_030752_0386B1_4F76_SNAP.tif
Padded: S1A_EW_GRDM_1SDH_20200114T114436_20200114T114541_030796_038848_72FA_SNAP.tif
Padded: S1A_EW_GRDM_1SDH_20200119T115242_20200119T115346_030869_038AD8_3E93_SNAP.tif
Padded: S1A_EW_GRDM_1SDH_20200123T112002_20200123T112106_030927_038CD5_FF43_SNAP.tif
Padded: S1A_EW_GRDM_1SDH_20200221T112803_20200221T11