# Custom Quantity Demo

In [None]:
import os
import sys
from functools import partial

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr

try:
    import salientsdk as sk
except ModuleNotFoundError as e:
    if os.path.exists("../salientsdk"):
        sys.path.append(os.path.abspath(".."))
        import salientsdk as sk
    else:
        raise ModuleNotFoundError("Install Salient SDK with: pip install salientsdk")

sk.set_file_destination("custom_quantities")

sk.login("SALIENT_USERNAME", "SALIENT_PASSWORD")

## Defining and Uploading a Custom Quantity

The goal of this notebook is to show you how to make your own custom quantities and get both historical occurences of the quantity and forecasts from GEM and NOAA GEFS. Custom quantities can be both continuous and binary and involve 1 or more core variables. These quantities involve extra processing steps to calculate, but can be more direct in answering the questions that matter most for your business; one example of a custom quantity is actually cooling and heating degree days, because these apply extra processing on the minimum and maximum temperatures. Although the processing for CDDs/HDDs is relatively simple, it is still has "extra steps" one must to take to get at.

To demonstrate our system, let's walk through the process of defining a complex or compound event: days with **both** high temperatures and low winds. At a high level, we are going to do the following:
1. Upload our definition of high temperatures
2. Upload our definition of low winds
3. Upload our definition for the complex event, which chains these two individual events together.

### Single Quantity
First, high temperatures. The `/upload_file` endpoint also has the ability to take CSVs with the parameters defining the quantity. **Key: to differentiate custom quantity files from location_files, there must be 2 and only 2 columns titled parameter and value**. For more information, please see the "Custom Quantity" section in our Notion help page: https://salientpredictions.notion.site/.

In [None]:
params = {
    "name": "hot_temps_example",
    "description": "my example of hot temperatures",
    "variable": "tmax",
    "daily_threshold_value": 0.90,
    "daily_threshold_type": "seasonal_quantile",
    "daily_threshold_direction": "above",
}

hot_temps_file_path = f"{sk.get_file_destination()}/hot_temps.csv"
df = pd.DataFrame.from_dict(params, orient="index").reset_index()
df.columns = ["parameter", "value"]
df.to_csv(hot_temps_file_path, index=False)
sk.upload_file(hot_temps_file_path, update=True, verbose=True)

One of a latitude/longitude pair, location_file, or shapefile can be included in the definition if you are interested in adding
spatial aggregations (such as population-weighted) or simply always look at a specfic area(s). These can also simply be added when calling an endpoint, as seen below. This gives you the flexibility to see maps in the UI over an entire region, such as North America, and then limit API calls. The name of the variable in the returned dataset is the same as the name passed for the definition.

In [None]:
loc = sk.Location(shapefile="CONUS.geojson")
file_dsc = sk.data_timeseries(
    loc=loc, custom_quantity="hot_temps_example", start="2020-10-16", end="2024-12-31"
)
hot_historical = xr.load_dataset(file_dsc)
hot_historical

In [None]:
params = {
    "name": "low_winds_example",
    "description": "my example of low winds",
    "variable": "wspd100",
    "daily_threshold_value": 0.10,
    "daily_threshold_type": "seasonal_quantile",
    "daily_threshold_direction": "below",
}

low_winds_file_path = f"{sk.get_file_destination()}/low_winds.csv"
df = pd.DataFrame.from_dict(params, orient="index").reset_index()
df.columns = ["parameter", "value"]
df.to_csv(low_winds_file_path, index=False)
sk.upload_file(low_winds_file_path, update=True, verbose=True)

In [None]:
file_dsc = sk.data_timeseries(
    loc=loc, custom_quantity="low_winds_example", start="2020-10-16", end="2024-12-31"
)
wind_historical = xr.load_dataset(file_dsc)

In [None]:
both_historical = xr.concat(
    [hot_historical["hot_temps_example"], wind_historical["low_winds_example"]], dim="quantity"
)
both_historical = both_historical.assign_coords(quantity=["hot_days", "low_wind"])
both_historical.name = "combined_event"

### Making a Complex Event

Custom quantities can also be complex or compound events where two or more individual events are chained together. To define a complex event, first be sure the individual components are all uploaded. Then, the key is to do something like: 

`combine`,event1&event2&event3&...

where "&" is used to chain the individual components. Finally, there is an `operator` keyword which can be "and" or "or"; note that `combine` always uses "&" despite the operator also having an "or" option.

Because the complex event uses the names of the individual components, if you make a change to one of them, the complex event will automatically be updated and use the new definition.

In [None]:
params = {
    "name": "complex_example",
    "description": "my example of complex event: high temps and low winds",
    "combine": "hot_temps_example&low_winds_example",
    "operator": "and",
}

complex_file_path = f"{sk.get_file_destination()}/complex.csv"
df = pd.DataFrame.from_dict(params, orient="index").reset_index()
df.columns = ["parameter", "value"]
df.to_csv(complex_file_path, index=False)
sk.upload_file(complex_file_path, update=True, verbose=True)

### Calling Endpoints

Once a custom quantity is uploaded, you can call `data_timeseries` to get historical occurances of the events, and `forecast_timeseries` to get forecasts from both GEM and NOAA GEFS. There are many parameters that are now no longer needed for the call, since things like `variable`, `field`, `units`, etc. are all defined within the event. Simply add `custom_quantity=name` to the call!

In [None]:
file_dsc = sk.data_timeseries(
    loc=loc, custom_quantity="complex_example", start="2020-10-16", end="2024-12-31"
)
complex_historical = xr.load_dataset(file_dsc)

Let's look at one specific example for forecasts: late August 2023. Here, we see warm temperatures across much of South and Central Texas, with low winds seen across parts of West Texas. 

In [None]:
file_dsc = sk.forecast_timeseries(
    loc=loc,
    date="2023/08/25",
    model="gem",
    timescale="daily",
    format="nc",
    custom_quantity=["hot_temps_example", "low_winds_example"],
)
forecast = xr.open_mfdataset(file_dsc.file_name)
combined_forecast = xr.concat([forecast[var] for var in forecast.data_vars], dim="quantity")
combined_forecast = combined_forecast.assign_coords(quantity=["hot_days", "low_winds"])

## Visualizations

In [None]:
cmap = mcolors.ListedColormap(["white", "green"])
subplot_proj = dict(projection=ccrs.AlbersEqualArea(central_latitude=35, central_longitude=-100))

im = both_historical.sel(time=slice("2023/08/27", "2023/08/31")).plot(
    row="quantity",
    col="time",
    add_colorbar=False,
    cmap=cmap,
    subplot_kws=subplot_proj,
    transform=ccrs.PlateCarree(),
)
for a in im.axs.flatten():
    a.add_feature(cfeature.STATES, linewidth=0.5)
    a.add_feature(cfeature.BORDERS, linewidth=0.5)
    a.set_extent([-110, -90, 24, 40])

Here, we can see that only areas that are shaded green show up in the complex event, owing to the "and" operator.

In [None]:
im = (
    complex_historical["complex_example"]
    .sel(time=slice("2023/08/27", "2023/08/31"))
    .plot(
        col="time",
        cmap=cmap,
        add_colorbar=False,
        subplot_kws=subplot_proj,
        transform=ccrs.PlateCarree(),
    )
)
for a in im.axs.flatten():
    a.add_feature(cfeature.STATES, linewidth=0.5)
    a.add_feature(cfeature.BORDERS, linewidth=0.5)
    a.set_extent([-110, -90, 24, 40])

A few days prior, GEM captured the areas with elevated risk of both events well.

In [None]:
im = combined_forecast.sel(valid_time=slice("2023/08/27", "2023/08/31")).plot(
    col="valid_time",
    row="quantity",
    cbar_kwargs=dict(location="bottom", pad=0.02),
    subplot_kws=subplot_proj,
    transform=ccrs.PlateCarree(),
)
for a in im.axs.flatten():
    a.add_feature(cfeature.STATES, linewidth=0.5)
    a.add_feature(cfeature.BORDERS, linewidth=0.5)
    a.set_extent([-110, -90, 24, 40])

## Multi-Day Event

Events can also be defined using some rolling aggregations to get mean weekly temperature, 5-day precipitation totals, etc. Here, we reproduce an example shown in our April webinar for the GEM launch: 3-day HDD totals exceeding 50 HDDs. Note that if the `accumulated_threshold_*` parameters were not present, this would return **continuous data** and instead of getting a probability, you'd get our standard set of quantiles (0.01, 0.025, 0.05, ...)

In [None]:
params = {
    "name": "multi_day_example",
    "description": "my example of multi-day event",
    "variable": "hdd",
    "rolling_window": 3,
    "rolling_window_unit": "D",
    "rolling_aggregation": "sum",
    "accumulated_threshold_value": 50,
    "accumulated_threshold_type": "absolute",
    "accumulated_threshold_direction": "above",
}

multi_day_hdd_file_path = f"{sk.get_file_destination()}/multi_day_hdd.csv"
df = pd.DataFrame.from_dict(params, orient="index").reset_index()
df.columns = ["parameter", "value"]
df.to_csv(multi_day_hdd_file_path, index=False)
sk.upload_file(multi_day_hdd_file_path, update=True, verbose=True)

In [None]:
loc = sk.Location(lat=32.7767, lon=-96.7970)  # dallas, tx
file_dsc = sk.data_timeseries(
    loc=loc, custom_quantity="multi_day_example", start="2024-01-01", end="2025-02-28"
)
multi_day_historical = xr.load_dataset(file_dsc)

In [None]:
def _preprocess(ds, date):
    return ds.sel(valid_time=date)


dates = xr.date_range(start="2023/12/28", end="2024/01/29", freq="D").to_list()
prep = partial(_preprocess, date="2024/01/29")
file_dsc = sk.forecast_timeseries(
    loc=loc,
    date=dates,
    model="gem",
    timescale="daily",
    format="nc",
    custom_quantity="multi_day_example",
)
multi_day_forecast_gem = xr.open_mfdataset(
    file_dsc.file_name, preprocess=prep, combine="nested", concat_dim="forecast_date"
).squeeze()

file_dsc = sk.forecast_timeseries(
    loc=loc,
    date=dates,
    model="noaa_gefs",
    timescale="daily",
    format="nc",
    custom_quantity="multi_day_example",
)
multi_day_forecast_gefs = xr.open_mfdataset(
    file_dsc.file_name, preprocess=prep, combine="nested", concat_dim="forecast_date"
).squeeze()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 4), constrained_layout=True)
multi_day_forecast_gem["multi_day_example"].plot(ax=ax, label="GEM")
multi_day_forecast_gefs["multi_day_example"].plot(ax=ax, label="GEFS")
ax.set_title("Forecast for 3-day HDD starting 2024-01-29", fontsize=13, fontweight="bold", pad=1)
ax.set_xlabel("Forecast Initialization", fontsize=11, fontweight="bold")
ax.set_ylabel("Probability (%)", fontsize=11, fontweight="bold")