In [2]:
import os
import sys
import glob
import sqlite3
from pathlib import Path
import ee
import geemap
import geopandas as gpd
import pandas as pd
import xee
import xarray as xr

sys.path.append(r'C:\Users\Pooya\w\GitHub\ShiraziPooya\DroughtMonitoringIran')

from app.utils.gee import run_with_adaptive_buffer, extract_points_to_csv


ee.Authenticate()
ee.Initialize(
    project = 'drought-monitoring-iran',
    opt_url = 'https://earthengine-highvolume.googleapis.com'
)

In [3]:
DATABASE_PATH = "../database/database.db"

conn = sqlite3.connect(DATABASE_PATH)

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [4]:
DATASETS = {
    "MOD16A2GF": {
        "image_collection_id": "MODIS/061/MOD16A2GF",
        "start_date": "2000-01-01",
        "end_date": "2026-01-01",
        "parameter": "PET",
        "multiply": 0.1,
        "add": 0.0,
        "scale": None,
        "unit": "kg/m^2/8day",
        "Cadence": "8 Days",
        "name": "MOD16A2GF",
        "output_path": f"../output/GEE/PET/MOD16A2GF.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
}

In [5]:
# for name, config in DATASETS.items():
#     print(f"Product: {name}")
#     extract_points_to_csv(
#         **config,
#         points_geojson="../assets/geo_data/MazandaranStationsIRIMO.geojson", 
#     )

In [6]:
for name, config in DATASETS.items():
    print(f"Product: {name}")
    df = run_with_adaptive_buffer(
        config=config,
        base_points_geojson=config["points_geojson"],
        buffer_list_m=[0, 1000, 1500, 2000, 2500, 5000, 10000],
    )  
    out_path = Path(config["output_path"])
    out_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(out_path, index=False)
    print(f"Final merged result saved to {out_path}")

Product: MOD16A2GF
Nominal scale for MOD16A2GF: 463.31271652791656

=== Try 1 with buffer_m = 0 m ===
Using scale: 463.31271652791656, buffer_m: 0
Stations still all-NaN after buffer 0 m: {'40788', '99306', '40736'}

=== Try 2 with buffer_m = 1000 m ===
Using scale: 463.31271652791656, buffer_m: 1000
Stations still all-NaN after buffer 1000 m: set()
All stations have at least some non-NaN values. Stopping.
Final merged result saved to ..\output\GEE\PET\MOD16A2GF.csv


# Concat Data

In [None]:
folder = "../output/GEE/PET/"

all_dfs = []

for filepath in glob.glob(os.path.join(folder, "*.csv")):
    df = pd.read_csv(filepath)
    
    filename = os.path.basename(filepath)
    model_name, _ = os.path.splitext(filename)
    
    model_name = model_name.split("-")[0]

    df = df.rename(columns={model_name: "PET"})
    df["model"] = model_name

    all_dfs.append(df)

result = pd.concat(all_dfs, ignore_index=True)
result["date"] = pd.to_datetime(result["date"], format="mixed", dayfirst=True, errors="coerce")
result = result.sort_values(by=["Region", "St_Name", "model", "date"]).reset_index(drop=True)

result

# Convert Long to Wide

In [None]:
result = result.pivot_table(
    index=["St_ID", "St_Name", "St_Lat", "St_Lon", "St_Ele", "date", "region_id"],
    columns="model",
    values="PET"
).reset_index()

result

# Concat geoinfo and results

In [None]:
result = pd.merge(
    left=result,
    right=geoinfo,
    left_on=["region_id", "St_Name"],
    right_on=["region_id", "station_name"],
    how="left"
).drop(columns=["St_ID", "St_Name", "St_Lat", "St_Lon", "St_Ele"])

cols = ["region_id", "region_name", "station_id", "station_name", "lat", "lon", "station_elevation"]
result = result[cols + [c for c in result.columns if c not in cols]]

result.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"], inplace=True)

result.info()

# Convert to Daily and Monthly

In [None]:
expanded_rows = []

for _, row in result.iterrows():
    for i in range(8):
        expanded_rows.append({
            'region_id': row['region_id'],
            'region_name': row['region_name'],
            'station_id': row['station_id'],
            'station_name': row['station_name'],
            'lat': row['lat'],
            'lon': row['lon'],
            'station_elevation': row['station_elevation'],            
            'date': row['date'] - pd.Timedelta(days=i),
            'MOD16A2GF': row['MOD16A2GF'] / 8
        })

daily_dataset = pd.DataFrame(expanded_rows)
daily_dataset["date"] = pd.to_datetime(daily_dataset["date"])
daily_dataset = daily_dataset.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"]).reset_index(drop=True)

daily_dataset.info()

In [None]:
tmp = daily_dataset.copy()
tmp["date"] = tmp["date"].dt.to_period("M").astype(str)

monthly_dataset = tmp.groupby(["region_id", "region_name", "station_id", "station_name", "date"])["MOD16A2GF"].sum(min_count=25).reset_index()
monthly_dataset["date"] = pd.to_datetime(monthly_dataset["date"]) + pd.offsets.MonthEnd(0)

monthly_dataset.info()

In [None]:
monthly_dataset = pd.merge(
    left=monthly_dataset,
    right=geoinfo,
    left_on=["region_id", "region_name", "station_id", "station_name"],
    right_on=["region_id", "region_name", "station_id", "station_name"],
    how="left"
)

cols = ["region_id", "region_name", "station_id", "station_name", "lat", "lon", "station_elevation"]
monthly_dataset = monthly_dataset[cols + [c for c in monthly_dataset.columns if c not in cols]]

monthly_dataset.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"], inplace=True)

monthly_dataset.info()

In [None]:
monthly_dataset

In [None]:
conn = sqlite3.connect(DATABASE_PATH)

result.to_sql('gee_pet_monthly', conn, if_exists='replace', index=False)

conn.commit()
conn.close()