# This notebook perofms a case study comparing GPROF V7, ERA5, and the SatRain retrievals


In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
from typing import Tuple

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

## GPROF


In [2]:
def load_gprof_precip(path: Path, scan_start: int, scan_end: int) -> xr.Dataset:
    """
    Load GPROF precipitation.

    Args:
        path: A Path object pointing to the IMERG file to load.
        scan_start: The start scan of the collocation.
        scan_end: Index pointing to the first scan that is not part of 
             the collocation anymore.

    Return:
        An xarray.Dataset containing the 'surface_precip', 'probability_of_precip',
        'precipitation_flag', 'probability_of_heavy_precip', and 'heavy_precip_flag'
        extracted from the GPROF file pointed to by 'path'.
    """
    slc = slice(scan_start, scan_end)
    with h5py.File(path) as data:
        lats = data["S1/Latitude"][slc]
        lons = data["S1/Longitude"][slc]
        
        surface_precip = data["S1/surfacePrecipitation"][slc]
        invalid = surface_precip < 0
        surface_precip[invalid] = np.nan
        probability_of_precip = data["S1/probabilityOfPrecip"][slc] / 100.0
        precipitation_flag = data["S1/precipitationYesNoFlag"][slc]
        probability_of_heavy_precip = np.clip(surface_precip / 100, 0.0, 1.0)

    data.close()
    del data
        
    return xr.Dataset({
        "longitude": (("scan", "pixel"), lons),
        "latitude": (("scan", "pixel"), lats),
        "surface_precip": (("scan", "pixel"), surface_precip),
        "probability_of_precip": (("scan", "pixel"), probability_of_precip),
        "precip_flag": (("scan", "pixel"), precipitation_flag),
        "probability_of_heavy_precip": (("scan", "pixel"), probability_of_heavy_precip),
        "heavy_precip_flag": (("scan", "pixel"), surface_precip > 10),
    })


In [3]:
from pyresample.geometry import SwathDefinition
from pansat.utils import resample_data
from pansat.products.satellite.gpm import l2a_gprof_gpm_gmi_v07a, l2a_gprof_gpm_gmi_v07b

def retrieve_gprof(input_data: xr.Dataset) -> xr.Dataset:
    """
    Retrieval callback function to load GPROF data corresponding to IPWGML SPR evaluation data.

    Args:
        input_data: An xarray.Dataset containing the retrieval input data.

    Return:
        An xarray.Dataset containing the retrieval results.
    """
    time = input_data.time.mean().data
    recs = l2a_gprof_gpm_gmi_v07a.get(time)
    if len(recs) == 0:
        recs = l2a_gprof_gpm_gmi_v07b.get(time)
    rec = recs[0]
    input_file = input_data.attrs["gpm_input_file"]
    scan_start = input_data.attrs["scan_start"]
    scan_end = input_data.attrs["scan_end"]
    results_gprof = load_gprof_precip(rec.local_path, scan_start, scan_end)

    n_batches = input_data.batch.size
    batch_results = []
    for batch_ind in range(n_batches):
        lons = input_data.longitude[batch_ind]
        lats = input_data.latitude[batch_ind]
        lons, lats = np.meshgrid(lons, lats)
        grid = SwathDefinition(lons, lats)
        results_gprof_r = resample_data(results_gprof, grid, radius_of_influence=10e3)
        batch_results.append(results_gprof_r.drop_vars(("latitude", "longitude")))
    return xr.concat(batch_results, dim="batch")

  _set_context_ca_bundle_path(ca_bundle_path)


In [4]:
from satrain.evaluation import Evaluator
from satrain.target import TargetConfig

for domain in ["austria", "korea", "conus"]:
    evaluator = Evaluator(
        domain=domain,
        base_sensor="gmi",
        geometry="gridded",
        retrieval_input=[
            {"name": "gmi", "normalize": "minmax", "nan": -2},
        ],
        download=True,
    )
    evaluator.evaluate(retrieve_gprof)
    results = evaluator.get_results()
    results.to_netcdf()

Output()

 ········


KeyboardInterrupt: 