In [1]:
import sys

import numpy as np
import xarray as xr

# prepend the parent directory so it'll find the local climate_indices in the path
if "/home/james/git/climate_indices" not in sys.path:
    sys.path.insert(0, "/home/james/git/climate_indices")
from climate_indices import compute, indices, utils

%matplotlib inline
%load_ext autoreload

In [2]:
# %autoreload 2
sys.path

['/home/james/git/climate_indices',
 '/home/james/git/climate_indices/notebooks',
 '/home/james/miniconda3/envs/spi_fit/lib/python37.zip',
 '/home/james/miniconda3/envs/spi_fit/lib/python3.7',
 '/home/james/miniconda3/envs/spi_fit/lib/python3.7/lib-dynload',
 '',
 '/home/james/miniconda3/envs/spi_fit/lib/python3.7/site-packages',
 '/home/james/miniconda3/envs/spi_fit/lib/python3.7/site-packages/IPython/extensions',
 '/home/james/.ipython']

In [32]:
%autoreload 2

In [3]:
# open the precipitation NetCDF as an xarray DataSet object
ds_lo = xr.open_dataset("/data/datasets/nclimgrid/nclimgrid_lowres_prcp.nc")
ds_hi = xr.open_dataset("/data/datasets/nclimgrid/nclimgrid_prcp.nc")

In [4]:
from climate_indices.compute import test_function

test_function("Found")

Found


In [5]:
# get the precipitation arrays, over which we'll compute the SPI
da_precip_lo = ds_lo["prcp"]
da_precip_hi = ds_hi["prcp"]

In [6]:
# make sure we have the arrays with time as the inner-most dimension
preferred_dims = ("lat", "lon", "time")
da_precip_lo = da_precip_lo.transpose(*preferred_dims)
da_precip_hi = da_precip_hi.transpose(*preferred_dims)

In [7]:
# we need for the lat and lon values to be in ascending order
# in order for the xr.apply_ufunc() to work as expected
# see https://stackoverflow.com/questions/53108606/xarray-apply-ufunc-with-groupby-unexpected-number-of-dimensions
data_arrays = {
    "low": da_precip_lo,
    "high": da_precip_hi,
}
for label, da in data_arrays.items():
    if da["lat"][0] > da["lat"][1]:
        print(f"The {label}-resolution DataArray's lats are descending -- flipping")
        da["lat"] = np.flip(da["lat"])
    if da["lon"][0] > da["lon"][1]:
        print(f"The {label}-resolution DataArray's lons are descending -- flipping")
        da["lon"] = np.flip(da["lon"])

The high-resolution DataArray's lats are descending -- flipping


In [38]:
initial_year = int(da_precip_lo["time"][0].dt.year)
scale_months = 3

In [8]:
total_lats = len(da_precip_lo["lat"])
total_lons = len(da_precip_lo["lon"])
monthly_vals_shape = (total_lats, total_lons, 12)
alphas = np.full(shape=monthly_vals_shape, fill_value=np.NaN)
betas = np.full(shape=monthly_vals_shape, fill_value=np.NaN)

In [35]:
from climate_indices.compute import Periodicity, scale_values

In [37]:
%%time
# loop over each lat/lon point and compute the gamma fitting parameters alpha and beta for 3-month scaled data
for lat_index in range(total_lats):
    for lon_index in range(total_lons):
        # get the values for the lat/lon grid cell
        values = da_precip_lo[lat_index, lon_index]

        # skip over this grid cell if all NaN values
        if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)):
            continue

        # scale to 3-month convolutions
        scaled_values = scale_values(values, scale=3, periodicity=Periodicity.monthly)

        # compute the fitting parameters on the scaled data
        (
            alphas[lat_index, lon_index],
            betas[lat_index, lon_index],
        ) = compute.gamma_parameters(
            scaled_values,
            data_start_year=initial_year,
            calibration_start_year=1900,
            calibration_end_year=2000,
            periodicity=compute.Periodicity.monthly,
        )

CPU times: user 10min 53s, sys: 11.7 s, total: 11min 5s
Wall time: 11min 5s


In [39]:
spi_gamma_3month = np.full(shape=da_precip_lo.shape, fill_value=np.NaN)
gamma_params = {"alphas": alphas, "betas": betas}

In [42]:
da_precip_lo.shape

(38, 87, 1466)

In [41]:
%%time
# loop over each lat/lon point and compute the gamma fitting parameters alpha and beta for 3-month scaled data
for lat_index in range(total_lats):
    for lon_index in range(total_lons):
        # get the values for the lat/lon grid cell
        values = da_precip_lo[lat_index, lon_index]

        # skip over this grid cell if all NaN values
        if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)):
            continue

        # compute SPI/gamma for the 3-month scale
        spi_gamma_3month[lat_index, lon_index] = indices.spi(
            values,
            scale=scale_months,
            distribution=indices.Distribution.gamma,
            data_start_year=initial_year,
            calibration_year_initial=1900,
            calibration_year_final=2000,
            periodicity=compute.Periodicity.monthly,
            fitting_params=gamma_params,
        )

ValueError: operands could not be broadcast together with shapes (123,12) (38,87,12) 

In [None]:
da_precip_lo_groupby = da_precip_lo.stack(point=("lat", "lon")).groupby("point")
da_precip_hi_groupby = da_precip_hi.stack(point=("lat", "lon")).groupby("point")

In [None]:
%%time
# use xarray's apply_ufunc to apply a function over each point
da_one_more_lo = xr.apply_ufunc(add_one, da_precip_lo_groupby)

In [None]:
da_one_more_lo_looper = da_precip_lo.copy()

In [None]:
%%time
# do it "by hand" by looping over each lat/lon point
for lat_index in range(len(da_one_more_lo_looper["lat"])):
    for lon_index in range(len(da_one_more_lo_looper["lon"])):
        da_one_more_lo_looper[lat_index, lon_index] = add_one(da_one_more_lo_looper[lat_index, lon_index])

In [None]:
%%time
# use xarray's apply_ufunc to apply a function over each point
da_one_more_hi = xr.apply_ufunc(add_one, da_precip_hi_groupby)

In [None]:
da_one_more_hi_looper = da_precip_hi.copy()

In [None]:
%%time
# do it "by hand" by looping over each lat/lon point
for lat_index in range(len(da_one_more_hi_looper["lat"])):
    for lon_index in range(len(da_one_more_hi_looper["lon"])):
        da_one_more_hi_looper[lat_index, lon_index] = add_one(da_one_more_hi_looper[lat_index, lon_index])

In [None]:
_FITTED_INDEX_VALID_MIN = -3.09
_FITTED_INDEX_VALID_MAX = 3.09


def spi(
    values: np.ndarray,
    scale: int,
    distribution: indices.Distribution,
    data_start_year: int,
    calibration_year_initial: int,
    calibration_year_final: int,
    periodicity: compute.Periodicity,
) -> np.ndarray:
    """
    Computes SPI (Standardized Precipitation Index).

    :param values: 1-D numpy array of precipitation values, in any units,
        first value assumed to correspond to January of the initial year if
        the periodicity is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled
        before the index is computed
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :param fitting_params: optional dictionary of pre-computed distribution
        fitting parameters, if the distribution is gamma then this dict should
        contain two arrays, keyed as "alphas" and "betas", and if the
        distribution is Pearson then this dict should contain four arrays keyed
        as "probabilities_of_zero", "locs", "scales", and "skews"
    :return SPI values fitted to the gamma distribution at the specified time
        step scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array
        of precipitation values
    """

    # we expect to operate upon a 1-D array, so if we've been passed a 2-D array
    # then we flatten it, otherwise raise an error
    shape = values.shape
    if len(shape) == 2:
        values = values.flatten()
    elif len(shape) != 1:
        message = f"Invalid shape of input array: {shape}" + " -- only 1-D and 2-D arrays are supported"
        _logger.error(message)
        raise ValueError(message)

    # if we're passed all missing values then we can't compute
    # anything, so we return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)):
        return values

    # clip any negative values to zero
    if np.amin(values) < 0.0:
        _logger.warn("Input contains negative values -- all negatives clipped to zero")
        values = np.clip(values, a_min=0.0, a_max=None)

    # remember the original length of the array, in order to facilitate
    # returning an array of the same size
    original_length = values.size

    # get a sliding sums array, with each time step's value scaled
    # by the specified number of time steps
    values = compute.sum_to_scale(values, scale)

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is compute.Periodicity.monthly:
        values = utils.reshape_to_2d(values, 12)

    elif periodicity is compute.Periodicity.daily:
        values = utils.reshape_to_2d(values, 366)

    else:
        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    if distribution is indices.Distribution.gamma:
        # fit the scaled values to a gamma distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_gamma(
            values,
            data_start_year,
            calibration_year_initial,
            calibration_year_final,
            periodicity,
        )
    elif distribution is indices.Distribution.pearson:
        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_pearson(
            values,
            data_start_year,
            calibration_year_initial,
            calibration_year_final,
            periodicity,
        )

    else:
        message = "Unsupported distribution argument: " + f"{distribution}"
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]

In [None]:
def apply_spi_gamma_monthly(
    data_array: xr.DataArray,
    months: int,
    data_start_year: int = 1895,
    calibration_year_initial: int = 1900,
    calibration_year_final: int = 2000,
) -> xr.DataArray:
    # stack the lat and lon dimensions into a new dimension named point, so at each lat/lon
    # we'll have a time series for the geospatial point, and group by these points
    da_precip_groupby = data_array.stack(point=("lat", "lon")).groupby("point")

    spi_args = {
        "scale": months,
        "distribution": indices.Distribution.gamma,
        "data_start_year": data_start_year,
        "calibration_year_initial": calibration_year_initial,
        "calibration_year_final": calibration_year_final,
        "periodicity": compute.Periodicity.monthly,
    }

    # apply the SPI function to the data array
    da_spi = xr.apply_ufunc(
        spi,
        da_precip_groupby,
        kwargs=spi_args,
    )

    # unstack the array back into original dimensions
    da_spi = da_spi.unstack("point")

    return da_spi

In [None]:
da_spi_lo = apply_spi_gamma_monthly(da_precip_lo, 3)

In [None]:
da_spi_lo[200].plot()

In [None]:
da_precip_hi = da_precip_hi.transpose("lat", "lon", "time")

In [None]:
da_precip_hi

In [None]:
da_precip_lo

In [None]:
da_spi_hi = apply_spi_gamma_monthly(da_precip_hi, 3)