In [30]:
import glob
import os
import numpy as np
import glob
import rasterio
from rasterio import features
from shapely.geometry import shape, box, mapping
from shapely.geometry.polygon import orient
import geopandas as gpd

output_folder = "./processed/pr"
input_folder = "./raw/pr"

In [5]:
def tif_to_geojson(tif_folder, geojson_folder):
    absYears = [50, 60]
    os.makedirs(geojson_folder, exist_ok=True)
    tif_files = glob.glob(os.path.join(tif_folder, "*.tif"))

    for tif_file in tif_files:
        years = absYears
        with rasterio.open(tif_file) as src:
            print(f"\nProcessing {tif_file}")
            print(f"  CRS: {src.crs}, Nodata: {src.nodata}, Bands: {src.count}")

            # Get timebands
            if src.count > 1:
                timebands = [src.descriptions[i] if src.descriptions[i] else f"{i+1}" for i in range(src.count)]
            else:
                timeband = src.tags().get('timeband', None)
                timebands = [timeband if timeband else "unknown"]

            selected_indices = [i for i, tb in enumerate(timebands) if any(str(y) in str(tb) for y in years)]
            for band_idx in selected_indices:
                image = src.read(band_idx + 1)
                print(f"  Band {band_idx + 1} ({timebands[band_idx]}) - dtype: {image.dtype}, "
                      f"min: {np.nanmin(image)}, max: {np.nanmax(image)}")

                if image.dtype != 'float32':
                    image = image.astype('float32')

                if src.nodata is not None:
                    mask = ~np.isnan(image) & (image != src.nodata)
                else:
                    mask = ~np.isnan(image)

                shapes_gen = features.shapes(image, mask=mask, transform=src.transform)
                geoms = []
                for geom, value in shapes_gen:
                    if np.isnan(value) or (src.nodata is not None and value == src.nodata):
                        continue
                    shapely_geom = shape(geom)
                    if shapely_geom.is_empty or shapely_geom.area < 1e-6:
                        continue
                    oriented_geom = orient(shapely_geom, sign=1.0)
                    geoms.append({
                        'geometry': oriented_geom,
                        'properties': {'value': float(value), 'timeband': timebands[band_idx]}
                    })
                print(f"    Extracted {len(geoms)} geometries")

                if not geoms:
                    print(f"    Warning: No geometries found for band {band_idx + 1} ({timebands[band_idx]})")
                    continue

                gdf = gpd.GeoDataFrame.from_features(geoms, crs=src.crs)
                # Optional: comment out if reprojection is causing issues
                # gdf = gdf.to_crs("EPSG:4326")

                geojson_path = os.path.join(
                    geojson_folder,
                    f"{os.path.splitext(os.path.basename(tif_file))[0]}_{timebands[band_idx]}.geojson"
                )
                gdf.to_file(geojson_path, driver="GeoJSON")
                print(f"    Saved to {geojson_path}")


In [None]:
tif_to_geojson(os.path.join(output_folder, "02_clipped"), os.path.join(output_folder, "03_geojson"))

In [None]:
tif_files = glob.glob(os.path.join(os.path.join(output_folder, "02_clipped"), "*.tif"))


def raster_cells_to_features(image, transform):
    features = []
    rows, cols = image.shape
    for row in range(rows):
        for col in range(cols):
            value = image[row, col]
            if np.isnan(value):
                continue
            # Get the bounds of the cell
            x0, y0 = rasterio.transform.xy(transform, row, col, offset='ul')
            x1, y1 = rasterio.transform.xy(transform, row, col, offset='lr')
            geom = box(x0, y1, x1, y0)
            features.append({
                'geometry': mapping(geom),
                'properties': {'value': float(value)}
            })
    return features

for tif_file in tif_files:
    years = [50, 60]
    with rasterio.open(tif_file) as src:
        print(f"\nProcessing {tif_file}")

        if src.count > 1:
            timebands = [src.descriptions[i] if src.descriptions[i] else f"{i+1}" for i in range(src.count)]
        else:
            timeband = src.tags().get('timeband', None)
            timebands = [timeband if timeband else "unknown"]

        selected_indices = [i for i, tb in enumerate(timebands) if any(str(y) in str(tb) for y in years)]

        for band_idx in selected_indices:
            image = src.read(band_idx + 1)

            if src.nodata is not None:
                mask = ~np.isnan(image) & (image != src.nodata)
            else:
                mask = ~np.isnan(image)
            geoms = raster_cells_to_features(image, src.transform)
            print(len(geoms))



Processing ./processed/pr\02_clipped\PR_med_maxPR.tif
10397
10397

Processing ./processed/pr\02_clipped\PR_med_spi.tif
10397
10397

Processing ./processed/pr\02_clipped\PR_spi test.tif
