In [1]:
import os
import sys
import glob
import sqlite3
from pathlib import Path
import ee
import geemap
import geopandas as gpd
import pandas as pd
import xee
import xarray as xr

sys.path.append(r'C:\Users\Pooya\w\GitHub\ShiraziPooya\DroughtMonitoringIran')

from app.utils.gee import extract_points_to_csv, run_with_adaptive_buffer


ee.Authenticate()
ee.Initialize(
    project = 'drought-monitoring-iran',
    opt_url = 'https://earthengine-highvolume.googleapis.com'
)

In [2]:
DATABASE_PATH = "../database/database.db"

conn = sqlite3.connect(DATABASE_PATH)

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [3]:
DATASETS = {
    # "GPM": {
    #     "image_collection_id": "NASA/GPM_L3/IMERG_MONTHLY_V07",
    #     "start_date": "1980-01-01",
    #     "end_date": "2026-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1.0,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/hr",
    #     "Cadence": "1 Month",
    #     "name": "GPM",
    #     "output_path": f"../output/GEE/Precipitation/GPM.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "TRMM": {
    #     "image_collection_id": "TRMM/3B43V7",
    #     "start_date": "1980-01-01",
    #     "end_date": "2026-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1.0,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/hr",
    #     "Cadence": "1 Month",
    #     "name": "TRMM",
    #     "output_path": f"../output/GEE/Precipitation/TRMM.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "ERA5": {
    #     "image_collection_id": "ECMWF/ERA5_LAND/MONTHLY_AGGR",
    #     "start_date": "1980-01-01",
    #     "end_date": "2026-01-01",
    #     "parameter": "total_precipitation_sum",
    #     "multiply": 1000,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Month",
    #     "name": "ERA5",
    #     "output_path": f"../output/GEE/Precipitation/ERA5.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "TerraClimate": {
    #     "image_collection_id": "IDAHO_EPSCOR/TERRACLIMATE",
    #     "start_date": "1980-01-01",
    #     "end_date": "2026-01-01",
    #     "parameter": "pr",
    #     "multiply": 1,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Month",
    #     "name": "TerraClimate",
    #     "output_path": f"../output/GEE/Precipitation/TerraClimate.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "PERSIANN_CDR-1980_1989": {
    #     "image_collection_id": "NOAA/PERSIANN-CDR",
    #     "start_date": "1980-01-01",
    #     "end_date": "1990-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Day",
    #     "name": "PERSIANN_CDR",
    #     "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-1980_1989.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "PERSIANN_CDR-1990_1999": {
    #     "image_collection_id": "NOAA/PERSIANN-CDR",
    #     "start_date": "1990-01-01",
    #     "end_date": "2000-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Day",
    #     "name": "PERSIANN_CDR",
    #     "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-1990_1999.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "PERSIANN_CDR-2000_2009": {
    #     "image_collection_id": "NOAA/PERSIANN-CDR",
    #     "start_date": "2000-01-01",
    #     "end_date": "2010-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Day",
    #     "name": "PERSIANN_CDR",
    #     "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-2000_2009.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "PERSIANN_CDR-2010_2019": {
    #     "image_collection_id": "NOAA/PERSIANN-CDR",
    #     "start_date": "2010-01-01",
    #     "end_date": "2020-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Day",
    #     "name": "PERSIANN_CDR",
    #     "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-2010_2019.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    # "PERSIANN_CDR-2020_2025": {
    #     "image_collection_id": "NOAA/PERSIANN-CDR",
    #     "start_date": "2020-01-01",
    #     "end_date": "2026-01-01",
    #     "parameter": "precipitation",
    #     "multiply": 1,
    #     "add": 0.0,
    #     "scale": None,
    #     "unit": "mm/month",
    #     "Cadence": "1 Day",
    #     "name": "PERSIANN_CDR",
    #     "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-2020_2025.csv",
    #     "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    # },
    "CHIRPS_1980_1984": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "1980-01-01",
        "end_date": "1985-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-1980_1984.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS_1985_1989": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "1985-01-01",
        "end_date": "1990-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-1985_1989.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS-1990_1994": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "1990-01-01",
        "end_date": "1995-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-1990_1994.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS-1995_1999": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "1995-01-01",
        "end_date": "2000-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-1995_1999.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS_2000_2004": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2000-01-01",
        "end_date": "2005-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2000_2004.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS_2005_2009": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2005-01-01",
        "end_date": "2010-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2005_2009.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS-2010_2014": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2010-01-01",
        "end_date": "2015-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2010_2014.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS-2015_2019": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2015-01-01",
        "end_date": "2020-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2015_2019.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
    "CHIRPS-2020_2025": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2020-01-01",
        "end_date": "2026-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": None,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2020_2025.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
}

In [4]:
# for name, config in DATASETS.items():
#     print(f"Product: {name}")
#     extract_points_to_csv(
#         **config,
#         # points_geojson="../assets/geo_data/MazandaranStationsIRIMO.geojson",
#     )

In [5]:
for name, config in DATASETS.items():
    print(f"Product: {name}")
    df = run_with_adaptive_buffer(
        config=config,
        base_points_geojson=config["points_geojson"],
        buffer_list_m=[0, 2500, 20000],
    )  
    out_path = Path(config["output_path"])
    out_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(out_path, index=False)
    print(f"Final merged result saved to {out_path}")

Product: CHIRPS_1980_1984
Nominal scale for CHIRPS: 5565.974539663679

=== Try 1 with buffer_m = 0 m ===
Using scale: 5565.974539663679, buffer_m: 0
Stations still all-NaN after buffer 0 m: {'99306', '40736', '40734'}

=== Try 2 with buffer_m = 2500 m ===
Using scale: 5565.974539663679, buffer_m: 2500
Stations still all-NaN after buffer 2500 m: {'99306', '40736'}

=== Try 3 with buffer_m = 20000 m ===
Using scale: 5565.974539663679, buffer_m: 20000
Stations still all-NaN after buffer 20000 m: set()
All stations have at least some non-NaN values. Stopping.
Final merged result saved to ..\output\GEE\Precipitation\CHIRPS-1980_1984.csv
Product: CHIRPS_1985_1989
Nominal scale for CHIRPS: 5565.974539663679

=== Try 1 with buffer_m = 0 m ===
Using scale: 5565.974539663679, buffer_m: 0
Stations still all-NaN after buffer 0 m: {'99306', '40736', '40734'}

=== Try 2 with buffer_m = 2500 m ===
Using scale: 5565.974539663679, buffer_m: 2500
Stations still all-NaN after buffer 2500 m: {'99306', '40

# Concat Data

In [6]:
folder = "../output/GEE/Precipitation/"

all_dfs = []

for filepath in glob.glob(os.path.join(folder, "*.csv")):
    df = pd.read_csv(filepath)
    
    filename = os.path.basename(filepath)
    model_name, _ = os.path.splitext(filename)
    
    model_name = model_name.split("-")[0]

    df = df.rename(columns={model_name: "Precipitation"})
    df["model"] = model_name

    all_dfs.append(df)

result = pd.concat(all_dfs, ignore_index=True)
result["date"] = pd.to_datetime(result["date"], format="mixed", dayfirst=True, errors="coerce")
result = result.sort_values(by=["Region", "St_Name", "model", "date"]).reset_index(drop=True)

result

Unnamed: 0,date,St_ID,Region,St_Ele,St_Lat,St_Lon,St_Name,region_id,Precipitation,model
0,1981-01-01,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,82.694200,CHIRPS
1,1981-02-01,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,104.255558,CHIRPS
2,1981-03-01,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,177.580194,CHIRPS
3,1981-04-01,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,102.832222,CHIRPS
4,1981-05-01,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,20.287688,CHIRPS
...,...,...,...,...,...,...,...,...,...,...
40855,2024-08-01,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,3.000000,TerraClimate
40856,2024-09-01,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,8.000000,TerraClimate
40857,2024-10-01,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,18.000000,TerraClimate
40858,2024-11-01,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,28.000000,TerraClimate


# Convert Long to Wide

In [7]:
result = result.pivot_table(
    index=["St_ID", "St_Name", "St_Lat", "St_Lon", "St_Ele", "date", "region_id"],
    columns="model",
    values="Precipitation"
).reset_index()

result

model,St_ID,St_Name,St_Lat,St_Lon,St_Ele,date,region_id,CHIRPS,ERA5,GPM,PERSIANN_CDR,TRMM,TerraClimate
0,40732,Ramsar,36.90,50.68,-20.0,1980-01-01,MASA,,282.246388,,,,103.0
1,40732,Ramsar,36.90,50.68,-20.0,1980-02-01,MASA,,162.290688,,,,86.0
2,40732,Ramsar,36.90,50.68,-20.0,1980-03-01,MASA,,151.580624,,,,89.0
3,40732,Ramsar,36.90,50.68,-20.0,1980-04-01,MASA,,84.181468,,,,33.0
4,40732,Ramsar,36.90,50.68,-20.0,1980-05-01,MASA,,84.903626,,,,29.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8245,99361,Alasht,36.07,52.84,1805.0,2025-06-01,MASA,19.601683,84.377884,7.920001,,,
8246,99361,Alasht,36.07,52.84,1805.0,2025-07-01,MASA,19.137444,21.478749,13.392001,,,
8247,99361,Alasht,36.07,52.84,1805.0,2025-08-01,MASA,36.370131,33.082195,,,,
8248,99361,Alasht,36.07,52.84,1805.0,2025-09-01,MASA,34.380717,150.438410,,,,


# Concat geoinfo and results

In [8]:
result = pd.merge(
    left=result,
    right=geoinfo,
    left_on=["region_id", "St_Name"],
    right_on=["region_id", "station_name"],
    how="left"
).drop(columns=["St_ID", "St_Name", "St_Lat", "St_Lon", "St_Ele"])

cols = ["region_id", "region_name", "station_id", "station_name", "lat", "lon", "station_elevation"]
result = result[cols + [c for c in result.columns if c not in cols]]

result.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"], inplace=True)

result

Unnamed: 0,region_id,region_name,station_id,station_name,lat,lon,station_elevation,date,CHIRPS,ERA5,GPM,PERSIANN_CDR,TRMM,TerraClimate
0,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,1980-01-01,,282.246388,,,,103.0
1,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,1980-02-01,,162.290688,,,,86.0
2,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,1980-03-01,,151.580624,,,,89.0
3,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,1980-04-01,,84.181468,,,,33.0
4,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,1980-05-01,,84.903626,,,,29.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8245,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2025-06-01,19.601683,84.377884,7.920001,,,
8246,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2025-07-01,19.137444,21.478749,13.392001,,,
8247,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2025-08-01,36.370131,33.082195,,,,
8248,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2025-09-01,34.380717,150.438410,,,,


In [9]:
conn = sqlite3.connect(DATABASE_PATH)

result.to_sql('gee_precip_monthly', conn, if_exists='replace', index=False)

conn.commit()
conn.close()