# AOIs

In [9]:
# Scott Trough
# - year round from 2015/07/03 to 2023/12/31
# - start of data period determined by compatibility with sentinel1_routines (Murashkins routine)
# - 1792 scenes total
scott_data_path = '/mnt/raid01/SAR/Sentinel-1/Arctic/ScottTrough/asdf/snap/'
scott_data_path_big = '/mnt/raid01/SAR/Sentinel-1/Arctic/ScottTrough/asdf/snap_big/'
scott_polygon_aoi = 'POLYGON((-70.75 71.19,-69.55 71.19,-69.55 71.59,-70.75 71.59,-70.75 71.19))'
scott_polygon_big = 'POLYGON((-71.395 71.032, -68.645 71.032, -68.645 71.912, -71.395 71.912, -71.395 71.032))'
scott_utm_zone_meridian = (19, -69)
scott_aoi_landmask_sample = None
scott_big_landmask_sample = '/mnt/raid01/SAR/Sentinel-1/Arctic/ScottTrough/asdf/snap_big/S1A_EW_GRDM_1SDH_20150717T112921_20150717T113021_006850_0093C2_03B7_BN_TN_CAL_SPCK_TC_Subset_dB_LSMask.tif'


# Western Svalbard
# - year round from 2017/01/01 to 2021/12/31
# - data period determined by availability of high temporal density from S1A + S1B
# - 2753 scenes total
# - !!! 2 MISSING !!!
svalard_data_path = '/mnt/raid01/SAR/Sentinel-1/Arctic/WesternSvalbard/pipeline_snap/'
svalbard_polygon_aoi = 'POLYGON((9.74 78.3,11.45 78.3,11.45 78.7,9.74 78.7,9.74 78.3))'
svalbard_utm_zone = (33, 15)
svalbard_aoi_landmask_sample = '/mnt/raid01/SAR/Sentinel-1/Arctic/WesternSvalbard/pipeline_snap/S1A_EW_GRDM_1SDH_20170215T065537_20170215T065637_015291_0190FC_2B8E_SNAP.tif'



# Nussaq
# - year round from  only from 2017/01/01 to 2021/12/31 
# - 1346 scenes total
nussaq_data_path = '/mnt/raid01/SAR/Sentinel-1/Arctic/DiscoBay/Nussaq/pipeline_snap/'
nussaq_polygon_aoi = 'POLYGON ((-56.14 70.20, -56.14 70.85, -53.66 70.85, -53.66 70.20, -56.14 70.20))'
nussaq_utm_zone_meridian = (21,-57) 
nussaq_aoi_landmask_sample = '/mnt/raid01/SAR/Sentinel-1/Arctic/DiscoBay/Nussaq/pipeline_snap/S1A_EW_GRDM_1SDH_20180309T104041_20180309T104141_020937_023ED7_BC36_SNAP.tif'


# Attu
# - year round from 2015/07/03 to 2024/12/31
# - 952 scenes total
attu_data_path = '/mnt/raid01/SAR/Sentinel-1/Arctic/DiscoBay/Attu/pipeline_snap/'
attu_polygon_aoi = 'POLYGON((-55.46 67.30, -53.36 67.30, -53.36 68.08, -55.46 68.08, -55.46 67.30))'
attu_utm_zone_meridian = (21,-57) 
attu_aoi_landmask_sample = None

# Kangaat
# - year round from 2015/07/03 to 2024/09/17
# - 895 scenes !!!! YET!!!!
kangaat_data_path = '/mnt/raid01/SAR/Sentinel-1/Arctic/DiscoBay/Kangaat/asdf/snap/'
kangaat_polygon_aoi = 'POLYGON((-55.10979969474843 68.44536073505672,-53.90696642393627 68.42745327139977,-53.96309992205016 68.00813587214579,-55.14414440975267 68.02566791055716,-55.10979969474843 68.44536073505672))'
kangaat_utm_zone_meridian = (21,-57)
kangaat_landmask_sample = None


# Kugmallit
# Barrow strait
# Sentralbanken High
# Primolizsa platform

### Create a binary landmask based on a "fit" input image for the specific AOI

In [None]:
from osgeo import gdal
import numpy as np
import os

def create_landmask(input_filename, output_filename):
    """
    Creates a binary landmask geotiff from an input geotiff based on NaN values.

    Parameters:
    - input_filename: str, path to the input geotiff file.
    - output_filename: str, path where the output landmask geotiff will be saved.
    """
    # Open the input dataset
    dataset = gdal.Open(input_filename)
    if dataset is None:
        print('Unable to open input geotiff.')
        return
    else:
        print(f'Input geotiff "{input_filename}" opened successfully.')

    # Read the first band (assuming single-band geotiff)
    band = dataset.GetRasterBand(1)
    data = band.ReadAsArray()
    
    # Get georeference info
    geotransform = dataset.GetGeoTransform()
    projection = dataset.GetProjection()
    driver = dataset.GetDriver()

    # Create landmask: 0 where data is not NaN, 1 where data is NaN
    landmask = np.where(np.isnan(data), 1, 0).astype(np.uint8)  # use uint8 to save space

    # Create the output dataset
    out_dataset = driver.Create(output_filename, dataset.RasterXSize, dataset.RasterYSize, 1, gdal.GDT_Byte)
    if out_dataset is None:
        print('Unable to create output geotiff.')
        return
    else:
        print(f'Output geotiff "{output_filename}" created successfully.')

    # Set the georeference info to the output dataset
    out_dataset.SetGeoTransform(geotransform)
    out_dataset.SetProjection(projection)

    # Write the landmask to the output dataset
    out_band = out_dataset.GetRasterBand(1)
    out_band.WriteArray(landmask)
    out_band.FlushCache()

    # Close the datasets
    dataset = None
    out_dataset = None

    print('Landmask creation completed.')


output_directory = '/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/landmasks/'

input_filename = scott_big_landmask_sample
landmask_path = os.path.join(output_directory, 'scott_big_landmask.tif')
create_landmask(input_filename, landmask_path)


### Create pngs with two colormaps: one for data, one for landmask

In [37]:
import os
import numpy as np
from osgeo import gdal, ogr, osr
from glob import glob
from tqdm import tqdm
from PIL import Image
from multiprocessing import Pool

def wkt_polygon_to_pixel_coords(wkt_polygon, dataset):
    """
    Given a WKT polygon (in lat/lon, EPSG:4326) and an open GDAL dataset (in EPSG:32619),
    return the polygon boundary in pixel coords.
    For a simple rectangle, this yields 5 corners (first==last).
    """

    # -------------------------
    # 1) Parse WKT Geometry
    # -------------------------
    geom = ogr.CreateGeometryFromWkt(wkt_polygon)
    if geom is None:
        raise ValueError("Invalid WKT polygon.")

    # -------------------------
    # 2) Set up the coordinate transform from EPSG:4326 -> EPSG:32619
    #    (the same projection as your GeoTIFF)
    # -------------------------
    sref_src = osr.SpatialReference()
    sref_src.ImportFromEPSG(4326)   # Lat/lon WGS84

    sref_tgt = osr.SpatialReference()
    sref_tgt.ImportFromEPSG(32619)  # WGS 84 / UTM zone 19N

    coord_trans = osr.CoordinateTransformation(sref_src, sref_tgt)
    geom.Transform(coord_trans)

    # -------------------------
    # 3) Read dataset info (GeoTransform)
    # -------------------------
    geo_transform = dataset.GetGeoTransform()   # [ originX, pixelSizeX, 0, originY, 0, pixelSizeY ]
    # Typically: originX = 413097.013..., originY = 7980790.035..., pixelSizeX=40.0, pixelSizeY=-40.0
    # Check with `dataset.GetProjection()` or `gdalinfo`.

    # Helper to go from projected coordinates (E,N) to pixel/line:
    def world_to_pixel(gt, x_geo, y_geo):
        """
        Convert georeferenced coordinates (in EPSG:32619) to pixel (col,row).
        """
        originX = gt[0]
        pixelSizeX = gt[1]
        originY = gt[3]
        pixelSizeY = gt[5]
        # col = (X - originX) / pixelSizeX
        # row = (Y - originY) / pixelSizeY
        col = int(round((x_geo - originX) / pixelSizeX))
        row = int(round((y_geo - originY) / pixelSizeY))
        return (col, row)

    # -------------------------
    # 4) Extract boundary points in the *transformed* geometry
    # -------------------------
    ring = geom.GetGeometryRef(0)  # outer ring
    n_points = ring.GetPointCount()

    pixel_coords = []
    for i in range(n_points):
        x_geo, y_geo, _ = ring.GetPoint(i)
        # print("Transformed corner (UTM):", x_geo, y_geo)
        col, row = world_to_pixel(geo_transform, x_geo, y_geo)
        pixel_coords.append((col, row))
    return pixel_coords


def _draw_polygon_on_array(img_array, pixel_coords, color=255, thickness=3):
    """
    Draws the polygon boundary on a 2D numpy array (grayscale),
    making the line 'thickness' pixels wide.
    """

    def draw_line(arr, x0, y0, x1, y1, val=color):
        dx = abs(x1 - x0)
        dy = abs(y1 - y0)
        sx = 1 if x0 < x1 else -1
        sy = 1 if y0 < y1 else -1
        err = dx - dy

        x, y = x0, y0
        while True:
            # For each pixel on the line, fill a neighborhood of 'thickness'
            fill_thickness(arr, x, y, thickness, val)
            
            if x == x1 and y == y1:
                break
            e2 = 2 * err
            if e2 > -dy:
                err -= dy
                x += sx
            if e2 < dx:
                err += dx
                y += sy

    def fill_thickness(arr, cx, cy, t, val):
        """
        Fill a (2t+1)x(2t+1) region around (cx, cy).
        For thickness=3, that's a 7x7 region (center +- 3 pixels).
        """
        radius = t // 2 if t > 2 else t
        for rx in range(cx - radius, cx + radius + 1):
            for ry in range(cy - radius, cy + radius + 1):
                if 0 <= rx < arr.shape[1] and 0 <= ry < arr.shape[0]:
                    arr[ry, rx] = val

    # Now draw edges between consecutive points
    for i in range(len(pixel_coords) - 1):
        (x0, y0) = pixel_coords[i]
        (x1, y1) = pixel_coords[i+1]
        draw_line(img_array, x0, y0, x1, y1, color)

def _fill_polygon_on_array(arr, pixel_coords, color=255):
    """
    Rudimentary polygon fill (scan-line or use shapely rasterize).
    But for a rectangle, you can just fill bounding box
    if you trust corners are correct.
    """
    cols = [pt[0] for pt in pixel_coords]
    rows = [pt[1] for pt in pixel_coords]
    min_c, max_c = min(cols), max(cols)
    min_r, max_r = min(rows), max(rows)
    # Clip to array bounds
    min_c = max(min_c, 0)
    min_r = max(min_r, 0)
    max_c = min(max_c, arr.shape[1]-1)
    max_r = min(max_r, arr.shape[0]-1)
    arr[min_r:max_r+1, min_c:max_c+1] = color


def _process_geotiff_file(args):
    """
    Helper function to process a single GeoTIFF file and save as PNG.
    Receives ONE tuple argument. We unpack it below.
    """
    (
        file_path,
        landmask,
        desired_height,
        desired_width,
        vmin,
        vmax,
        channel_num,
        output_dir,
        polygon_wkt
    ) = args

    try:
        filename = os.path.basename(file_path)
        output_filename = os.path.splitext(filename)[0] + ".png"
        output_path = os.path.join(output_dir, output_filename)
        
        # Load data from GeoTIFF
        dataset = gdal.Open(file_path)
        if dataset is None:
            print(f"[Warning] Unable to open geotiff file: {file_path}")
            return False

        band = dataset.GetRasterBand(channel_num)
        data = band.ReadAsArray()
        
        # Truncate to desired size
        data = data[:desired_height, :desired_width]

        # Ensure landmask and data have the same shape
        if data.shape != landmask.shape:
            print(f"[Warning] Dimension mismatch (data vs landmask) for file: {file_path}")
            dataset = None
            return False

        # Create output array
        output_array = np.zeros_like(data, dtype=np.uint8)

        # Masks
        # According to your script, "landmask == 1" => ocean
        land_pixel_mask   = (landmask == 1)
        border_pixel_mask = np.isnan(data) & (~land_pixel_mask)
        ocean_pixels_mask = (~np.isnan(data)) & (~land_pixel_mask)

        # Extract ocean data
        data_ocean = data[ocean_pixels_mask]
        if data_ocean.size == 0:
            # No valid ocean pixels
            print(f"[Warning] No valid ocean pixels found in {file_path}")
            dataset = None
            return False

        # Clip & normalize ocean pixels
        data_ocean_clipped = np.clip(data_ocean, vmin, vmax)
        data_ocean_normalized = (data_ocean_clipped - vmin) / (vmax - vmin) * 255
        output_array[ocean_pixels_mask] = data_ocean_normalized.astype(np.uint8)

        # Border value: median of ocean pixels
        border_value = np.median(data_ocean)
        border_value_normalized = np.clip(border_value, vmin, vmax)
        border_value_normalized = (border_value_normalized - vmin) / (vmax - vmin) * 255
        border_value_normalized = int(border_value_normalized)

        # Assign border value
        output_array[border_pixel_mask] = border_value_normalized

        # --------------------------------------------
        # Draw the rectangle from WKT (lat/lon -> UTM)
        # --------------------------------------------
        if polygon_wkt is not None:
            # print(f"[Info] Drawing polygon on image: {polygon_wkt}")
            pixel_coords = wkt_polygon_to_pixel_coords(polygon_wkt, dataset)
            _draw_polygon_on_array(output_array, pixel_coords, color=255, thickness=5)
            
        # Convert to PIL image and save
        image = Image.fromarray(output_array, mode="L")
        image.save(output_path)

        dataset = None
        return True

    except Exception as e:
        print(f"[Error] Unexpected exception with file {file_path}:\n{e}")
        return False


def convert_geotiffs_to_pngs(
    input_dir,
    landmask_path,
    output_dir,
    desired_width,
    desired_height,
    vmin,
    vmax,
    channel_num=1,
    flag_calc_vmin_vmax=False,
    num_processes=8,
    polygon_wkt=None
):
    """
    Converts GeoTIFF data into PNGs using parallel processing.
    Draws a rectangle derived from a WKT in lat/lon (EPSG:4326),
    transformed to match the dataset's UTM projection (EPSG:32619).
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    geotiff_files = glob(os.path.join(input_dir, "*.tif"))
    if not geotiff_files:
        print("[Info] No geotiff files found in input directory.")
        return
    
    # Load landmask
    landmask_dataset = gdal.Open(landmask_path)
    if landmask_dataset is None:
        print("[Error] Unable to open landmask geotiff.")
        return
    landmask_band = landmask_dataset.GetRasterBand(1)
    landmask = landmask_band.ReadAsArray()
    landmask_dataset = None

    # Truncate landmask
    landmask = landmask[:desired_height, :desired_width]

    # If requested, compute vmin/vmax from data
    if flag_calc_vmin_vmax:
        print("[Info] Computing global vmin/vmax from percentiles...")
        data_values = []
        for file_path in tqdm(geotiff_files, desc="Gathering data stats"):
            ds = gdal.Open(file_path)
            if ds is None:
                continue
            band = ds.GetRasterBand(channel_num)
            data = band.ReadAsArray()
            ds = None

            data = data[:desired_height, :desired_width]

            # Only ocean pixels => (landmask == 1)
            mask = (~np.isnan(data)) & (landmask == 1)
            data_vals = data[mask]
            data_values.extend(data_vals)

        data_values = np.array(data_values)
        if data_values.size == 0:
            print("[Error] No data pixels found for percentile computation.")
            return

        lower = 0.135
        upper = 99.865
        vmin = np.percentile(data_values, lower)
        vmax = np.percentile(data_values, upper)
        print(f"[Info] Computed vmin={vmin}, vmax={vmax}")

    # Build arguments for each file
    args_list = []
    for file_path in geotiff_files:
        args_list.append((
            file_path,
            landmask,
            desired_height,
            desired_width,
            vmin,
            vmax,
            channel_num,
            output_dir,
            polygon_wkt
        ))

    print("[Info] Starting parallel conversion...")

    with Pool(processes=num_processes) as p:
        results_iter = p.imap_unordered(_process_geotiff_file, args_list)
        for _ in tqdm(results_iter, total=len(args_list), desc="Converting"):
            pass

    print("[Info] Conversion completed.")


In [None]:
# input_directory  = "/mnt/raid01/SAR/Sentinel-1/Arctic/WesternSvalbard/pipeline_snap/"
# landmask_path    = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/landmasks/svalbard_landmask.tif"
# output_directory = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/western_svalbard_png/HH/"


# input_directory  = "/mnt/raid01/SAR/Sentinel-1/Arctic/DiscoBay/Nussaq/pipeline_snap/"
# landmask_path    = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/landmasks/nussaq_landmask.tif"
# output_directory = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/nussaq_png/HV/"

input_directory  = '/mnt/raid01/SAR/Sentinel-1/Arctic/ScottTrough/asdf/snap_big/'
landmask_path    = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/landmasks/scott_big_landmask.tif"
output_directory = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/scott_big_png/HH_polygon/"


# svalbard_HH_HV
    # desired_width=1024,
    # desired_height=1024,
    # vmin = -48.9,
    # vmax = 0.1,

# nussaq_HH
    # desired_width=2369,
    # desired_height=1891,
    # vmin = -41.9,
    # vmax = -0.3,

# nussaq_HV
    # vmin = -48.6,
    # vmax = -19.5,

# scott_big_HH
    # desired_width=2494,
    # desired_height=2495,
    # vmin = -49.3,
    # vmax = -1.3,
    
# scott_big_HV
    # vmin = -47.4,
    # vmax = -16.2,


convert_geotiffs_to_pngs(
    input_dir=input_directory,
    landmask_path=landmask_path,
    output_dir=output_directory,
    desired_width=2494,
    desired_height=2495,
    vmin = -49.3,
    vmax = -1.3,
    channel_num=1,             # example
    flag_calc_vmin_vmax=False, # or True if you want to compute from data
    num_processes=8,
    polygon_wkt='POLYGON(( 71.19 -70.75,  71.19 -69.55,  71.59 -69.55,  71.59 -70.75,  71.19 -70.75))'
    
)

[Info] Starting parallel conversion...


Converting:   0%|          | 0/1792 [00:00<?, ?it/s]



Converting:   0%|          | 1/1792 [00:00<10:27,  2.85it/s]



Converting:   0%|          | 3/1792 [00:00<04:28,  6.67it/s]



Converting:   0%|          | 6/1792 [00:00<02:22, 12.57it/s]




Converting:   1%|          | 12/1792 [00:00<01:12, 24.61it/s]



Converting:   1%|          | 16/1792 [00:00<01:02, 28.57it/s]




Converting:   1%|          | 20/1792 [00:00<01:02, 28.46it/s]



Converting:   1%|▏         | 24/1792 [00:01<01:02, 28.14it/s]



Converting:   2%|▏         | 28/1792 [00:01<01:02, 28.25it/s]



Converting:   2%|▏         | 32/1792 [00:01<01:16, 23.07it/s]



Converting:   2%|▏         | 36/1792 [00:01<01:08, 25.71it/s]



Converting:   2%|▏         | 39/1792 [00:01<01:13, 23.97it/s]



Converting:   2%|▏         | 42/1792 [00:02<01:42, 17.07it/s]




Converting:   3%|▎         | 50/1792 [00:02<01:32, 18.81it/s]



Converting:   3%|▎         | 53/1792 [00:02<01:52, 15.50it/s]



Converting:   3%|▎         | 55/1792 [00:03<02:09, 13.44it/s]




Converting:   4%|▎         | 63/1792 [00:03<01:20, 21.53it/s]




Converting:   4%|▍         | 68/1792 [00:03<01:12, 23.75it/s]



Converting:   4%|▍         | 72/1792 [00:03<01:10, 24.41it/s]



Converting:   4%|▍         | 76/1792 [00:03<01:05, 26.37it/s]



Converting:   4%|▍         | 79/1792 [00:03<01:06, 25.65it/s]



Converting:   5%|▍         | 82/1792 [00:03<01:11, 23.80it/s]



Converting:   5%|▍         | 86/1792 [00:04<01:05, 26.10it/s]



Converting:   5%|▍         | 89/1792 [00:04<01:07, 25.27it/s]



Converting:   5%|▌         | 92/1792 [00:04<01:08, 24.87it/s]



Converting:   5%|▌         | 95/1792 [00:04<01:12, 23.40it/s]




Converting:   6%|▌         | 100/1792 [00:04<01:01, 27.53it/s]



Converting:   6%|▌         | 103/1792 [00:04<01:00, 27.95it/s]



Converting:   6%|▌         | 106/1792 [00:04<01:05, 25.82it/s]



Converting:   6%|▌         | 109/1792 [00:04<01:03, 26.48it/s]





Converting:   6%|▋         | 113/1792 [00:05<01:06, 25.14it/s]



Converting:   7%|▋         | 118/1792 [00:05<00:58, 28.62it/s]



Converting:   7%|▋         | 121/1792 [00:05<00:58, 28.76it/s]



Converting:   7%|▋         | 124/1792 [00:05<01:13, 22.78it/s]




Converting:   7%|▋         | 128/1792 [00:05<01:12, 22.82it/s]



Converting:  88%|████████▊ | 1585/1792 [04:41<00:41,  5.01it/s]



Converting:  89%|████████▉ | 1592/1792 [04:42<00:34,  5.82it/s]



Converting:  89%|████████▉ | 1595/1792 [04:42<00:27,  7.19it/s]



Converting:  89%|████████▉ | 1598/1792 [04:43<00:34,  5.54it/s]



Converting:  89%|████████▉ | 1603/1792 [04:44<00:32,  5.84it/s]



Converting:  90%|████████▉ | 1605/1792 [04:44<00:25,  7.39it/s]



Converting:  90%|████████▉ | 1612/1792 [04:45<00:25,  7.14it/s]



Converting:  90%|█████████ | 1618/1792 [04:46<00:34,  5.09it/s]



Converting:  90%|█████████ | 1620/1792 [04:46<00:26,  6.48it/s]



Converting:  91%|█████████ | 1625/1792 [04:47<00:27,  6.13it/s]



Converting:  91%|█████████ | 1630/1792 [04:48<00:25,  6.23it/s]



Converting:  91%|█████████▏| 1639/1792 [04:49<00:27,  5.54it/s]



Converting:  92%|█████████▏| 1647/1792 [04:51<00:24,  5.99it/s]



Converting:  93%|█████████▎| 1664/1792 [04:54<00:28,  4.43it/s]



Converting:  93%|█████████▎| 1668/1792 [04:54<00:18,  6.80it/s]



Converting:  94%|█████████▍| 1684/1792 [04:57<00:21,  4.97it/s]



Converting:  94%|█████████▍| 1687/1792 [04:57<00:14,  7.34it/s]



Converting:  95%|█████████▍| 1702/1792 [05:00<00:21,  4.24it/s]



Converting:  96%|█████████▋| 1726/1792 [05:04<00:14,  4.66it/s]



Converting:  97%|█████████▋| 1742/1792 [05:07<00:08,  6.19it/s]



Converting:  98%|█████████▊| 1748/1792 [05:08<00:05,  7.46it/s]



Converting:  98%|█████████▊| 1756/1792 [05:09<00:05,  6.25it/s]



Converting:  98%|█████████▊| 1764/1792 [05:11<00:04,  6.18it/s]



Converting:  99%|█████████▊| 1766/1792 [05:11<00:03,  7.29it/s]



Converting:  99%|█████████▉| 1772/1792 [05:12<00:04,  4.99it/s]



Converting:  99%|█████████▉| 1774/1792 [05:12<00:02,  6.75it/s]



Converting:  99%|█████████▉| 1779/1792 [05:13<00:01,  8.67it/s]



Converting: 100%|██████████| 1792/1792 [05:16<00:00,  5.67it/s]

[Info] Conversion completed.





### PNG to MP4 steady scenes per second

In [12]:
import os
import cv2
import numpy as np
from datetime import datetime, date # Import date explicitly

def add_text_to_image(image,
                      date_text,
                      time_text,
                      identifier_text,
                      position=(60, 90),
                      font_scale=3,
                      font_color=(255, 255, 255)):
    """
    Add date, time, and identifier (e.g., satellite) text to the image.
    """
    font = cv2.FONT_HERSHEY_SIMPLEX
    date_position = position
    time_position = (position[0], position[1] + 90)       # Position time below date
    identifier_position = (position[0], position[1] + 180) # Position identifier below time

    # You can adjust the color, thickness, etc.
    font_color = (0, 0, 0)  # black text
    thickness = 5

    cv2.putText(image, date_text, date_position, font, font_scale, font_color, thickness, cv2.LINE_AA)
    cv2.putText(image, time_text, time_position, font, font_scale, font_color, thickness, cv2.LINE_AA)
    cv2.putText(image, identifier_text, identifier_position, font, font_scale, font_color, thickness, cv2.LINE_AA)

    return image

def parse_filename_for_datetime(filename):
    """
    Extracts a datetime object and identifier from a Sentinel-1 style filename.
    Adjust the slicing for your specific naming format as needed.

    Example filename: "S1A_..._20170101T072022_..._B670_SNAP.png"
    """
    try:
        # Extract date and time components based on the filename structure
        year = filename[17:21]
        month = filename[21:23]
        day = filename[23:25]
        hour = filename[26:28]
        minute = filename[28:30]
        second = filename[30:32]
        sat = filename[0:3]

        # Create a datetime object
        file_datetime = datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
    except Exception as e:
        print(f"Warning: Error parsing filename '{filename}': {e}. Assigning default date.")
        # Assign a default datetime far in the past/future if parsing fails
        # This helps ensure they are excluded if date filters are active
        file_datetime = datetime.min
        sat = "SAT"

    return file_datetime, sat

# Helper function to get just the date part for comparison
def get_date_from_filename(filename):
    """Parses filename and returns only the date part of the datetime object."""
    # Return None if parsing failed to avoid comparison errors later
    try:
        return parse_filename_for_datetime(filename)[0].date()
    except: # Catch potential errors from parse_filename_for_datetime returning datetime.min
        return None # Indicate failure to get a valid date

def create_video(dir1,
                 output_video,
                 frame_rate=3,
                 side_by_side=False,
                 dir2=None,
                 start_date_str=None, # <-- Existing parameter
                 end_date_str=None):   # <-- New parameter
    """
    Creates a timelapse video from PNG images sorted by date and time extracted from filenames.

    :param dir1: Path to the primary directory containing PNG images.
    :param output_video: Output video file path (e.g., .mp4).
    :param frame_rate: Frames per second in the output video.
    :param side_by_side: If True, images from dir1 and dir2 will be combined horizontally.
    :param dir2: Path to the secondary directory (used only if side_by_side=True).
    :param start_date_str: Optional. Start date in 'DDMMYYYY' format. Images before this date will be excluded.
    :param end_date_str: Optional. End date in 'DDMMYYYY' format. Images after this date will be excluded.
    """

    start_date_filter = None
    end_date_filter = None
    date_filter_active = False

    # Parse Start Date
    if start_date_str:
        try:
            start_date_filter = datetime.strptime(start_date_str, "%d%m%Y").date()
            date_filter_active = True
        except ValueError:
            print(f"Error: Invalid start_date format '{start_date_str}'. Please use DDMMYYYY. Start date filter ignored.")

    # Parse End Date
    if end_date_str:
        try:
            end_date_filter = datetime.strptime(end_date_str, "%d%m%Y").date()
            date_filter_active = True
        except ValueError:
            print(f"Error: Invalid end_date format '{end_date_str}'. Please use DDMMYYYY. End date filter ignored.")

    # Print filter status
    if date_filter_active:
        start_msg = f"from {start_date_filter.strftime('%d/%m/%Y')}" if start_date_filter else "from beginning"
        end_msg = f"until {end_date_filter.strftime('%d/%m/%Y')}" if end_date_filter else "until end"
        print(f"Filtering images {start_msg} {end_msg}.")
    else:
        print("No date filters applied.")


    # --- Filtering Function ---
    def apply_date_filter(filenames):
        if not date_filter_active:
            return filenames # No filtering needed

        filtered_list = []
        for f in filenames:
            img_date = get_date_from_filename(f)
            if img_date is None: # Skip files with parsing errors
                continue

            # Apply filters based on which dates are set
            include = True
            if start_date_filter and img_date < start_date_filter:
                include = False
            if end_date_filter and img_date > end_date_filter:
                include = False

            if include:
                filtered_list.append(f)
        return filtered_list
    # --- End Filtering Function ---


    if side_by_side:
        if not dir2:
            raise ValueError("When side_by_side=True, you must provide 'dir2' as well.")

        # 1) Gather initial PNG filenames from each directory
        images_1_all = [f for f in os.listdir(dir1) if f.lower().endswith('.png')]
        images_2_all = [f for f in os.listdir(dir2) if f.lower().endswith('.png')]

        # 1a) Apply Date Filter
        images_1_filtered = apply_date_filter(images_1_all)
        images_2_filtered = apply_date_filter(images_2_all)

        if date_filter_active:
            print(f"Dir1: Kept {len(images_1_filtered)} out of {len(images_1_all)} images after date filter.")
            print(f"Dir2: Kept {len(images_2_filtered)} out of {len(images_2_all)} images after date filter.")

        # 2) Find common files *from the filtered lists*
        common_files = list(set(images_1_filtered).intersection(set(images_2_filtered)))
        if not common_files:
            print("No common PNG files found in the two directories within the specified date range.")
            return

        # 3) Sort common_files based on datetime
        def get_datetime_sort_key(fname):
            return parse_filename_for_datetime(fname)[0]

        common_files.sort(key=get_datetime_sort_key)

        if not common_files:
            print("No PNG images to process after sorting.") # Should not happen if check above passed
            return

        # 4) Read first pair to determine output size
        first_img_1 = cv2.imread(os.path.join(dir1, common_files[0]))
        first_img_2 = cv2.imread(os.path.join(dir2, common_files[0]))

        if first_img_1 is None or first_img_2 is None:
            print(f"Failed to read the first pair of images ({common_files[0]}) for size determination.")
            return

        # Ensure both are the same size if needed
        h1, w1 = first_img_1.shape[:2]
        h2, w2 = first_img_2.shape[:2]
        if (h1, w1) != (h2, w2):
            first_img_2 = cv2.resize(first_img_2, (w1, h1), interpolation=cv2.INTER_AREA)

        combined_first = np.hstack([first_img_1, first_img_2])
        out_height, out_width = combined_first.shape[:2]

        # 5) Create VideoWriter
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(output_video, fourcc, frame_rate, (out_width, out_height))

        # 6) Loop through each common file (already filtered and sorted)
        for fname in common_files:
            path1 = os.path.join(dir1, fname)
            path2 = os.path.join(dir2, fname)

            img1 = cv2.imread(path1)
            img2 = cv2.imread(path2)

            if img1 is None or img2 is None:
                print(f"Skipping {fname}, could not read one of the images.")
                continue

            # Resize if needed (using consistent logic)
            h1_curr, w1_curr = img1.shape[:2]
            h2_curr, w2_curr = img2.shape[:2]
            target_h, target_w = h1, w1 # Use size from the first valid pair
            if (h1_curr, w1_curr) != (target_h, target_w):
                img1 = cv2.resize(img1, (target_w, target_h), interpolation=cv2.INTER_AREA)
            if (h2_curr, w2_curr) != (target_h, target_w):
                 img2 = cv2.resize(img2, (target_w, target_h), interpolation=cv2.INTER_AREA)


            combined = np.hstack([img1, img2])

            # Parse filename for datetime and satellite
            file_datetime, sat_str = parse_filename_for_datetime(fname)
            date_str = file_datetime.strftime("%d/%m/%Y")
            time_str = file_datetime.strftime("%H:%M:%S")

            # Add overlay text
            combined_with_text = add_text_to_image(combined, date_str, time_str, sat_str)
            video_writer.write(combined_with_text)

        video_writer.release()
        print(f"Side-by-side video saved to {output_video}")

    else:
        # Single-directory mode
        # 1) Gather initial PNG filenames
        images_all = [f for f in os.listdir(dir1) if f.lower().endswith('.png')]
        if not images_all:
            print(f"No PNG images found in {dir1}")
            return

        # 1a) Apply Date Filter
        images_filtered = apply_date_filter(images_all)

        if date_filter_active:
             print(f"Dir1: Kept {len(images_filtered)} out of {len(images_all)} images after date filter.")

        if not images_filtered:
            print(f"No PNG images remaining in {dir1} within the specified date range.")
            return

        # 1b) Sort images based on datetime *after filtering*
        def get_datetime_sort_key(fname):
            return parse_filename_for_datetime(fname)[0]

        images_filtered.sort(key=get_datetime_sort_key)
        images = images_filtered # Use filtered list

        # 2) Determine size from the first image in the filtered list
        first_img = cv2.imread(os.path.join(dir1, images[0]))
        if first_img is None:
            print(f"Failed to read the first image ({images[0]}) for size determination.")
            return
        height, width = first_img.shape[:2]

        # 3) Create VideoWriter
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        # fourcc = cv2.VideoWriter_fourcc(*'avc1')
        # fourcc = cv2.VideoWriter_fourcc(*'XVID')
        video_writer = cv2.VideoWriter(output_video, fourcc, frame_rate, (width, height))

        # 4) Loop through each image (already filtered and sorted)
        for fname in images:
            path = os.path.join(dir1, fname)
            img = cv2.imread(path)
            if img is None:
                print(f"Skipping {fname}, could not read image.")
                continue

            # Ensure consistent frame size
            h_curr, w_curr = img.shape[:2]
            if (h_curr, w_curr) != (height, width):
                 print(f"Warning: Resizing image {fname} from {w_curr}x{h_curr} to {width}x{height}")
                 img = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)

            # Parse filename for datetime and satellite
            file_datetime, sat_str = parse_filename_for_datetime(fname)
            date_str = file_datetime.strftime("%d/%m/%Y")
            time_str = file_datetime.strftime("%H:%M:%S")

            # Add text
            img_with_text = add_text_to_image(img, date_str, time_str, sat_str)
            video_writer.write(img_with_text)

        video_writer.release()
        print(f"Single-directory timelapse video saved to {output_video}")

if __name__ == "__main__":
    # --- Example Usage ---

    # Define directories and output (adjust paths as needed)
    dir_first = '/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/scott_big_png/HH_polygon/'
    dir_second = '/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/scott_big_png/HV_polygon/'
    output_video_base = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/scott_polygon_dual"

    # # --- Scenario 1: Side-by-side, full date range ---
    # print("\n--- Running Scenario 1: Side-by-side, full range ---")
    # create_video(dir_first,
    #              output_video_base + "_all_dates.mp4",
    #              frame_rate=9,
    #              side_by_side=True,
    #              dir2=dir_second,
    #              start_date_str=None, # Explicitly None or omit
    #              end_date_str=None)   # Explicitly None or omit

    # # --- Scenario 2: Side-by-side, start date filter only ---
    # print("\n--- Running Scenario 2: Side-by-side, filtered from 01 Jan 2020 ---")
    # start_date = "01012020"
    # create_video(dir_first,
    #              output_video_base + f"_from_{start_date}.mp4",
    #              frame_rate=9,
    #              side_by_side=True,
    #              dir2=dir_second,
    #              start_date_str=start_date,
    #              end_date_str=None) # Only start date

    # # --- Scenario 3: Side-by-side, end date filter only ---
    # print("\n--- Running Scenario 3: Side-by-side, filtered until 31 Dec 2021 ---")
    # end_date = "31122021"
    # create_video(dir_first,
    #              output_video_base + f"_until_{end_date}.mp4",
    #              frame_rate=9,
    #              side_by_side=True,
    #              dir2=dir_second,
    #              start_date_str=None, # Only end date
    #              end_date_str=end_date)

    # --- Scenario 4: Side-by-side, both start and end date filter ---
    start_date_range = "01012018"
    end_date_range = "31122019"
    create_video(dir_first,
                 output_video_base + f"_between_{start_date_range}_and_{end_date_range}.mp4",
                 frame_rate=6,
                 side_by_side=True,
                 dir2=dir_second,
                 start_date_str=start_date_range, # Both dates
                 end_date_str=end_date_range)

    # --- Scenario 5: Single directory, date range filter (Example) ---
    # print("\n--- Running Scenario 5: Single directory, filtered range ---")
    # output_video_single_filtered = '/path/to/output_single_filtered_range.mp4'
    # start_date_single = "15062019"
    # end_date_single = "14072019"
    # create_video(dir_first, # Using dir_first as example
    #              output_video_single_filtered,
    #              frame_rate=3,
    #              side_by_side=False,
    #              start_date_str=start_date_single,
    #              end_date_str=end_date_single)

Filtering images from 01/01/2018 until 31/12/2019.
Dir1: Kept 456 out of 1633 images after date filter.
Dir2: Kept 456 out of 1633 images after date filter.
Side-by-side video saved to /mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries/scott_polygon_dual_between_01012018_and_31122019.mp4


### PNG to MP4 Linear time

In [None]:
import os
import cv2
from datetime import datetime

def add_text_to_image(
    image,
    date_text,
    time_text,
    identifier_text,
    position=(40, 70),
    font_scale=1,
    font_color=(255, 255, 255)
):
    font = cv2.FONT_HERSHEY_SIMPLEX
    date_position = position
    time_position = (position[0], position[1] + 40)      # Position time below date
    identifier_position = (position[0], position[1] + 80)# Position identifier below time
    
    # Black text for better contrast
    font_color = (0, 0, 0)
    cv2.putText(image, date_text, date_position, font, font_scale, font_color, 2, cv2.LINE_AA)
    cv2.putText(image, time_text, time_position, font, font_scale, font_color, 2, cv2.LINE_AA)
    cv2.putText(image, identifier_text, identifier_position, font, font_scale, font_color, 2, cv2.LINE_AA)

    return image

def create_timelapse_video_linear_time(
    image_folder,
    video_name,
    nominal_fps=30,
    real_seconds_per_video_second=86400
):
    """
    Create a video whose frame spacing is proportional to the real time intervals 
    between images. The final video has a nominal frame rate = nominal_fps, 
    but we replicate frames to reflect actual time differences.

    :param image_folder: path to folder with .png images
    :param video_name: output .mp4 video filepath
    :param nominal_fps: the nominal FPS to use in the video container
    :param real_seconds_per_video_second: how many real-world seconds 
        correspond to 1 second of video. For example:
        - 86400 = 24 hours real time -> 1 second of video
        - 3600  =  1 hour  real time -> 1 second of video
        - etc.
    """
    
    # 1. Gather and parse images with their timestamps
    all_files = [f for f in os.listdir(image_folder) if f.endswith(".png")]
    
    # Parse datetime from filenames. Adapt these slices to your actual filename pattern.
    image_info = []
    for f in all_files:
        # Example filename pattern: S1A_..._20170101T072022_... 
        # year = f[17:21], month = f[21:23], day = f[23:25]
        # hh = f[26:28], mm = f[28:30], ss = f[30:32], sat = f[0:3]
        try:
            year = int(f[17:21])
            month = int(f[21:23])
            day = int(f[23:25])
            hh = int(f[26:28])
            mm = int(f[28:30])
            ss = int(f[30:32])
            sat = f[0:3]

            dt = datetime(year, month, day, hh, mm, ss)
            image_info.append((f, dt, sat))
        except ValueError:
            # If parsing fails for a file, skip it
            continue

    # 2. Sort images by datetime
    image_info.sort(key=lambda x: x[1])  # sort by dt

    if not image_info:
        print("No valid images found in folder.")
        return

    # Read the first image to get size (height, width)
    first_img_path = os.path.join(image_folder, image_info[0][0])
    first_frame = cv2.imread(first_img_path)
    height, width, layers = first_frame.shape

    # 3. Create VideoWriter with the NOMINAL fps
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(video_name, fourcc, nominal_fps, (width, height))

    # 4. Loop through all images, compute time difference -> replicate frames
    # We'll track the previous time to compute deltas
    prev_time = image_info[0][1]

    # Write the very first frame (no delta)
    f, dtobj, sat = image_info[0]
    date_str = dtobj.strftime("%d/%m/%Y")
    time_str = dtobj.strftime("%H:%M:%S")
    
    img = cv2.imread(os.path.join(image_folder, f))
    img_with_text = add_text_to_image(img, date_str, time_str, sat)
    # We'll always write at least 1 copy of the first image
    video_writer.write(img_with_text)

    for i in range(1, len(image_info)):
        current_file, current_time, sat = image_info[i]

        # Calculate real time difference in seconds from previous image
        delta_seconds = (current_time - prev_time).total_seconds()
        
        # Convert that real time difference to how many *extra* frames to write
        # real_seconds_per_video_second -> 1 second of video
        # so in 'delta_seconds' real time => (delta_seconds / real_seconds_per_video_second) video-seconds
        # then multiply by nominal_fps to get number of frames
        num_frames = int((delta_seconds / real_seconds_per_video_second) * nominal_fps)

        # Read this image
        img_path = os.path.join(image_folder, current_file)
        img = cv2.imread(img_path)
        
        # Add text
        date_str = current_time.strftime("%d/%m/%Y")
        time_str = current_time.strftime("%H:%M:%S")
        img_with_text = add_text_to_image(img, date_str, time_str, sat)

        # We write at least 1 frame for each new image to ensure it appears
        # So the total frames we write is (1 + num_frames)
        # If delta_seconds is very small, num_frames might be 0
        frames_to_write = max(1, num_frames)

        for _ in range(frames_to_write):
            video_writer.write(img_with_text)

        # Update prev_time
        prev_time = current_time

    video_writer.release()
    cv2.destroyAllWindows()
    print(f"Video saved to: {video_name}")

# -------------------------------------------------------------------------
# Example usage:
# You can tune real_seconds_per_video_second as desired:
#   86400  => 1 day of real time corresponds to 1 second of video
#   3600   => 1 hour of real time => 1 second of video
#   etc.
# Also tune nominal_fps if you want the final container to be e.g. 25, 30, ...
# -------------------------------------------------------------------------

if __name__ == "__main__":
    png_folder = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries_mp4/western_svalbard_png_2/"
    video_destination = "/mnt/raid01/SAR/Sentinel-1/Arctic/ArcticDeepSeepsData/AOIs_timeseries_mp4/Western_Svalbard_linear_time.mp4"
    
    # 3 means nominal 3 fps in the video container. 
    # For every real 86,400 seconds (1 day), we spend 1 second in the video.
    create_timelapse_video_linear_time(
        image_folder=png_folder,
        video_name=video_destination,
        nominal_fps=3,
        real_seconds_per_video_second=86400  # 1 day of real time -> 1s of video
    )
