In [1]:
import os
import sys
import glob
import sqlite3
from pathlib import Path
import ee
import geemap
import geopandas as gpd
import pandas as pd
import xee
import xarray as xr

sys.path.append(r'C:\Users\Pooya\w\GitHub\ShiraziPooya\DroughtMonitoringIran')

from app.utils.gee import run_with_adaptive_buffer, extract_points_to_csv


ee.Authenticate()
ee.Initialize(
    project = 'drought-monitoring-iran',
    opt_url = 'https://earthengine-highvolume.googleapis.com'
)

In [2]:
DATABASE_PATH = "../database/database.db"

conn = sqlite3.connect(DATABASE_PATH)

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [4]:
DATASETS = {
    "MOD16A2GF": {
        "image_collection_id": "MODIS/061/MOD16A2GF",
        "start_date": "2000-01-01",
        "end_date": "2026-01-01",
        "parameter": "PET",
        "multiply": 0.1,
        "add": 0.0,
        "scale": None,
        "unit": "kg/m^2/8day",
        "Cadence": "8 Days",
        "name": "MOD16A2GF",
        "output_path": f"../output/GEE/PET/MOD16A2GF.csv",
        "points_geojson": "../assets/geo_data/MazandaranStationsIRIMO.geojson"
    },
}

In [5]:
# for name, config in DATASETS.items():
#     print(f"Product: {name}")
#     extract_points_to_csv(
#         **config,
#         points_geojson="../assets/geo_data/MazandaranStationsIRIMO.geojson", 
#     )

In [6]:
for name, config in DATASETS.items():
    print(f"Product: {name}")
    df = run_with_adaptive_buffer(
        config=config,
        base_points_geojson=config["points_geojson"],
        buffer_list_m=[0, 1000, 1500, 2000, 2500, 5000, 10000],
    )  
    out_path = Path(config["output_path"])
    out_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(out_path, index=False)
    print(f"Final merged result saved to {out_path}")

Product: MOD16A2GF
Nominal scale for MOD16A2GF: 463.31271652791656

=== Try 1 with buffer_m = 0 m ===
Using scale: 463.31271652791656, buffer_m: 0
Stations still all-NaN after buffer 0 m: {'40788', '99306', '40736'}

=== Try 2 with buffer_m = 1000 m ===
Using scale: 463.31271652791656, buffer_m: 1000
Stations still all-NaN after buffer 1000 m: set()
All stations have at least some non-NaN values. Stopping.
Final merged result saved to ..\output\GEE\PET\MOD16A2GF.csv


# Concat Data

In [3]:
folder = "../output/GEE/PET/"

all_dfs = []

for filepath in glob.glob(os.path.join(folder, "*.csv")):
    df = pd.read_csv(filepath)
    
    filename = os.path.basename(filepath)
    model_name, _ = os.path.splitext(filename)
    
    model_name = model_name.split("-")[0]

    df = df.rename(columns={model_name: "PET"})
    df["model"] = model_name

    all_dfs.append(df)

result = pd.concat(all_dfs, ignore_index=True)
result["date"] = pd.to_datetime(result["date"], format="mixed", dayfirst=True, errors="coerce")
result = result.sort_values(by=["Region", "St_Name", "model", "date"]).reset_index(drop=True)

result

Unnamed: 0,date,St_ID,Region,St_Ele,St_Lat,St_Lon,St_Name,region_id,PET,model
0,2000-01-01,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,22.9,MOD16A2GF
1,2000-01-09,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,10.6,MOD16A2GF
2,2000-01-17,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,17.7,MOD16A2GF
3,2000-01-25,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,19.9,MOD16A2GF
4,2000-02-02,99361,Mazandaran,1805.0,36.07,52.84,Alasht,MASA,16.6,MOD16A2GF
...,...,...,...,...,...,...,...,...,...,...
17245,2024-11-24,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,16.8,MOD16A2GF
17246,2024-12-02,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,17.8,MOD16A2GF
17247,2024-12-10,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,15.5,MOD16A2GF
17248,2024-12-18,40735,Mazandaran,1855.4,36.23,51.30,Siahbisheh,MASA,18.5,MOD16A2GF


# Convert Long to Wide

In [4]:
result = result.pivot_table(
    index=["St_ID", "St_Name", "St_Lat", "St_Lon", "St_Ele", "date", "region_id"],
    columns="model",
    values="PET"
).reset_index()

result

model,St_ID,St_Name,St_Lat,St_Lon,St_Ele,date,region_id,MOD16A2GF
0,40732,Ramsar,36.90,50.68,-20.0,2000-01-01,MASA,15.7
1,40732,Ramsar,36.90,50.68,-20.0,2000-01-09,MASA,7.6
2,40732,Ramsar,36.90,50.68,-20.0,2000-01-17,MASA,19.7
3,40732,Ramsar,36.90,50.68,-20.0,2000-01-25,MASA,18.4
4,40732,Ramsar,36.90,50.68,-20.0,2000-02-02,MASA,18.3
...,...,...,...,...,...,...,...,...
17245,99361,Alasht,36.07,52.84,1805.0,2024-11-24,MASA,17.7
17246,99361,Alasht,36.07,52.84,1805.0,2024-12-02,MASA,18.8
17247,99361,Alasht,36.07,52.84,1805.0,2024-12-10,MASA,13.9
17248,99361,Alasht,36.07,52.84,1805.0,2024-12-18,MASA,18.8


# Concat geoinfo and results

In [5]:
result = pd.merge(
    left=result,
    right=geoinfo,
    left_on=["region_id", "St_Name"],
    right_on=["region_id", "station_name"],
    how="left"
).drop(columns=["St_ID", "St_Name", "St_Lat", "St_Lon", "St_Ele"])

cols = ["region_id", "region_name", "station_id", "station_name", "lat", "lon", "station_elevation"]
result = result[cols + [c for c in result.columns if c not in cols]]

result.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"], inplace=True)

result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17250 entries, 0 to 17249
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   region_id          17250 non-null  object        
 1   region_name        17250 non-null  object        
 2   station_id         17250 non-null  object        
 3   station_name       17250 non-null  object        
 4   lat                17250 non-null  float64       
 5   lon                17250 non-null  float64       
 6   station_elevation  17250 non-null  float64       
 7   date               17250 non-null  datetime64[ns]
 8   MOD16A2GF          17250 non-null  float64       
dtypes: datetime64[ns](1), float64(4), object(4)
memory usage: 1.2+ MB


# Convert to Daily and Monthly

In [6]:
expanded_rows = []

for _, row in result.iterrows():
    for i in range(8):
        expanded_rows.append({
            'region_id': row['region_id'],
            'region_name': row['region_name'],
            'station_id': row['station_id'],
            'station_name': row['station_name'],
            'lat': row['lat'],
            'lon': row['lon'],
            'station_elevation': row['station_elevation'],            
            'date': row['date'] - pd.Timedelta(days=i),
            'MOD16A2GF': row['MOD16A2GF'] / 8
        })

daily_dataset = pd.DataFrame(expanded_rows)
daily_dataset["date"] = pd.to_datetime(daily_dataset["date"])
daily_dataset = daily_dataset.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"]).reset_index(drop=True)

daily_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138000 entries, 0 to 137999
Data columns (total 9 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   region_id          138000 non-null  object        
 1   region_name        138000 non-null  object        
 2   station_id         138000 non-null  object        
 3   station_name       138000 non-null  object        
 4   lat                138000 non-null  float64       
 5   lon                138000 non-null  float64       
 6   station_elevation  138000 non-null  float64       
 7   date               138000 non-null  datetime64[ns]
 8   MOD16A2GF          138000 non-null  float64       
dtypes: datetime64[ns](1), float64(4), object(4)
memory usage: 9.5+ MB


In [7]:
tmp = daily_dataset.copy()
tmp["date"] = tmp["date"].dt.to_period("M").astype(str)

monthly_dataset = tmp.groupby(["region_id", "region_name", "station_id", "station_name", "date"])["MOD16A2GF"].sum(min_count=25).reset_index()
monthly_dataset["date"] = pd.to_datetime(monthly_dataset["date"]) + pd.offsets.MonthEnd(0)

monthly_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4515 entries, 0 to 4514
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   region_id     4515 non-null   object        
 1   region_name   4515 non-null   object        
 2   station_id    4515 non-null   object        
 3   station_name  4515 non-null   object        
 4   date          4515 non-null   datetime64[ns]
 5   MOD16A2GF     4500 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 211.8+ KB


In [8]:
monthly_dataset = pd.merge(
    left=monthly_dataset,
    right=geoinfo,
    left_on=["region_id", "region_name", "station_id", "station_name"],
    right_on=["region_id", "region_name", "station_id", "station_name"],
    how="left"
)

cols = ["region_id", "region_name", "station_id", "station_name", "lat", "lon", "station_elevation"]
monthly_dataset = monthly_dataset[cols + [c for c in monthly_dataset.columns if c not in cols]]

monthly_dataset.sort_values(by=["region_id", "region_name", "station_id", "station_name", "date"], inplace=True)

monthly_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4515 entries, 0 to 4514
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   region_id          4515 non-null   object        
 1   region_name        4515 non-null   object        
 2   station_id         4515 non-null   object        
 3   station_name       4515 non-null   object        
 4   lat                4515 non-null   float64       
 5   lon                4515 non-null   float64       
 6   station_elevation  4515 non-null   float64       
 7   date               4515 non-null   datetime64[ns]
 8   MOD16A2GF          4500 non-null   float64       
dtypes: datetime64[ns](1), float64(4), object(4)
memory usage: 317.6+ KB


In [9]:
monthly_dataset

Unnamed: 0,region_id,region_name,station_id,station_name,lat,lon,station_elevation,date,MOD16A2GF
0,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,1999-12-31,
1,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,2000-01-31,61.3875
2,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,2000-02-29,84.0125
3,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,2000-03-31,140.3375
4,MASA,Mazandaran,40732,Ramsar,36.90,50.68,-20.0,2000-04-30,196.2250
...,...,...,...,...,...,...,...,...,...
4510,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2024-08-31,225.5875
4511,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2024-09-30,153.4125
4512,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2024-10-31,124.9500
4513,MASA,Mazandaran,99361,Alasht,36.07,52.84,1805.0,2024-11-30,73.1000


In [10]:
conn = sqlite3.connect(DATABASE_PATH)

monthly_dataset.to_sql('gee_pet_monthly', conn, if_exists='replace', index=False)

conn.commit()
conn.close()