In [1]:
import os
import glob
import ee
import geemap
import geopandas as gpd
import pandas as pd
import xee
import xarray as xr


ee.Authenticate()
ee.Initialize(
    project = 'drought-monitoring-iran',
    opt_url = 'https://earthengine-highvolume.googleapis.com'
)

In [None]:
def extract_points_to_csv(image_collection_id: str, 
                          start_date: str, 
                          end_date: str, 
                          parameter: str, 
                          multiply: float = 1.0, 
                          add: float = 0.0, 
                          scale: float = 1000,
                          unit: str = None,
                          Cadence: str = None,
                          name: str = None,
                          points_shapefile: str = None, 
                          points_geojson: str = None, 
                          output_path: str = "output.csv") -> None:
  
    if points_shapefile:
        points_fc = geemap.shp_to_ee(points_shapefile)
    elif points_geojson:
        points_fc = geemap.geojson_to_ee(points_geojson)
    else:
        raise ValueError("A points_shapefile or points_geojson must be provided.")
    
    collection = ee.ImageCollection(image_collection_id).filterDate(start_date, end_date).select(parameter)
    
    def daily_to_monthly_sum(col):
        def add_month(img):
            date = ee.Date(img.get("system:time_start"))
            return img.set("month", date.format("YYYY-MM"))
        
        col_with_month = col.map(add_month)

        months = ee.List(col_with_month.aggregate_array("month")).distinct()

        band_names = ee.Image(col.first()).bandNames()

        def make_monthly_image(m):
            m = ee.String(m)
            month_coll = col_with_month.filter(ee.Filter.eq("month", m))
            month_sum = month_coll.sum()
            month_sum = month_sum.rename(band_names)

            date = ee.Date.parse("YYYY-MM", m)
            return month_sum.set("system:time_start", date.millis())

        monthly_ic = ee.ImageCollection(months.map(make_monthly_image))
        return monthly_ic
    
    if Cadence == "1 Day":
        collection = daily_to_monthly_sum(collection)
    
           
    if (multiply != 1.0) or (add != 0.0):
        def apply_scaling(img):
            scaled = ee.Image(img).multiply(multiply).add(add)
            return scaled.copyProperties(img, img.propertyNames())
        collection = collection.map(apply_scaling)
    
    def accumulate_points(image, feature_coll):
        feature_coll = ee.FeatureCollection(feature_coll)
        values = ee.Image(image).select(parameter).reduceRegions(
            collection=points_fc, 
            reducer=ee.Reducer.first().setOutputs(['value']), 
            scale=scale
        )
        date_str = ee.Date(image.get("system:time_start")).format("YYYY-MM-dd")
        values_with_date = values.map(lambda f: f.set("date", date_str))
        return feature_coll.merge(values_with_date)

    result_fc = ee.FeatureCollection(collection.iterate(accumulate_points, ee.FeatureCollection([])))
    df = geemap.ee_to_df(result_fc)
    

    
    if unit == "mm/month":
        df["value"] = df["value"]
    elif unit == "mm/day":
        df["value"] = df["value"] * df["date"].apply(lambda x: pd.Period(x, freq='M').days_in_month)
    elif unit == "mm/hr":
        df["value"] = df["value"] * 24 * df["date"].apply(lambda x: pd.Period(x, freq='M').days_in_month)
    
    if name:
        df.rename(columns = {"value": f"{name}"}, inplace=True)

    if output_path.lower().endswith(".xlsx"):
        df.to_excel(output_path, index=False)
    else:
        df.to_csv(output_path, index=False)
    
    print(f"Point extraction results saved to {output_path}")


In [None]:
DATASETS = {
    "GPM": {
        "image_collection_id": "NASA/GPM_L3/IMERG_MONTHLY_V07",
        "start_date": "1980-01-01",
        "end_date": "2026-01-01",
        "parameter": "precipitation",
        "multiply": 1.0,
        "add": 0.0,
        "scale": 11132,
        "unit": "mm/hr",
        "Cadence": "1 Month",
        "name": "GPM",
        "output_path": f"../output/GEE/Precipitation/GPM.csv"
    },
    "TRMM": {
        "image_collection_id": "TRMM/3B43V7",
        "start_date": "1980-01-01",
        "end_date": "2026-01-01",
        "parameter": "precipitation",
        "multiply": 1.0,
        "add": 0.0,
        "scale": 27830,
        "unit": "mm/hr",
        "Cadence": "1 Month",
        "name": "TRMM",
        "output_path": f"../output/GEE/Precipitation/TRMM.csv"
    },
    "ERA5": {
        "image_collection_id": "ECMWF/ERA5_LAND/MONTHLY_AGGR",
        "start_date": "1980-01-01",
        "end_date": "2026-01-01",
        "parameter": "total_precipitation_sum",
        "multiply": 1000,
        "add": 0.0,
        "scale": 11132,
        "unit": "mm/month",
        "Cadence": "1 Month",
        "name": "ERA5",
        "output_path": f"../output/GEE/Precipitation/ERA5.csv"
    },
    "TerraClimate": {
        "image_collection_id": "IDAHO_EPSCOR/TERRACLIMATE",
        "start_date": "1980-01-01",
        "end_date": "2026-01-01",
        "parameter": "pr",
        "multiply": 1,
        "add": 0.0,
        "scale": 4638.3,
        "unit": "mm/month",
        "Cadence": "1 Month",
        "name": "TerraClimate",
        "output_path": f"../output/GEE/Precipitation/TerraClimate.csv"
    },
    "PERSIANN_CDR-1980_1989": {
        "image_collection_id": "NOAA/PERSIANN-CDR",
        "start_date": "1980-01-01",
        "end_date": "1990-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 27830,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "PERSIANN_CDR",
        "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-1980_1989.csv"
    },
    "PERSIANN_CDR-1990_1999": {
        "image_collection_id": "NOAA/PERSIANN-CDR",
        "start_date": "1990-01-01",
        "end_date": "2000-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 27830,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "PERSIANN_CDR",
        "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-1990_1999.csv"
    },
    "PERSIANN_CDR-2000_2009": {
        "image_collection_id": "NOAA/PERSIANN-CDR",
        "start_date": "2000-01-01",
        "end_date": "2010-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 27830,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "PERSIANN_CDR",
        "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-2000_2009.csv"
    },
    "PERSIANN_CDR-2010_2019": {
        "image_collection_id": "NOAA/PERSIANN-CDR",
        "start_date": "2010-01-01",
        "end_date": "2020-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 27830,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "PERSIANN_CDR",
        "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-2010_2019.csv"
    },
    "PERSIANN_CDR-2020_2025": {
        "image_collection_id": "NOAA/PERSIANN-CDR",
        "start_date": "2020-01-01",
        "end_date": "2026-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 27830,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "PERSIANN_CDR",
        "output_path": f"../output/GEE/Precipitation/PERSIANN_CDR-2020_2025.csv"
    },
    "CHIRPS_1980_1989": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "1980-01-01",
        "end_date": "1990-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 5566,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-1980_1989.csv"
    },
    "CHIRPS-1990_1999": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "1990-01-01",
        "end_date": "2000-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 5566,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-1990_1999.csv"
    },
    "CHIRPS_2000_2009": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2000-01-01",
        "end_date": "2010-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 5566,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2000_2009.csv"
    },
    "CHIRPS-2010_2019": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2010-01-01",
        "end_date": "2020-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 5566,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2010_2019.csv"
    },
    "CHIRPS-2020_2025": {
        "image_collection_id": "UCSB-CHG/CHIRPS/DAILY",
        "start_date": "2020-01-01",
        "end_date": "2026-01-01",
        "parameter": "precipitation",
        "multiply": 1,
        "add": 0.0,
        "scale": 5566,
        "unit": "mm/month",
        "Cadence": "1 Day",
        "name": "CHIRPS",
        "output_path": f"../output/GEE/Precipitation/CHIRPS-2020_2025.csv"
    },
}

In [None]:
for name, config in DATASETS.items():
    print(f"Product: {name}")
    extract_points_to_csv(
        **config,
        points_geojson="../assets/geo_data/MazandaranStationsIRIMO.geojson",
    )

# Concat Data

In [11]:
folder = "../output/GEE/Precipitation/"

all_dfs = []

for filepath in glob.glob(os.path.join(folder, "*.csv")):
    df = pd.read_csv(filepath)
    
    filename = os.path.basename(filepath)
    model_name, _ = os.path.splitext(filename)
    
    model_name = model_name.split("-")[0]

    df = df.rename(columns={model_name: "Precipitation"})
    df["model"] = model_name
    
    all_dfs.append(df)

result = pd.concat(all_dfs, ignore_index=True)

# convert long to wi
result.pivot(index='date', columns='model', values='Precipitation')

ValueError: Index contains duplicate entries, cannot reshape