# Generate DailyPoP calibration figure (fig 8)

In this notebook we:
1. produce reliability diagrams for the Official and Automated DailyPoP forecasts over inland Australia (Fig 8), and
2. produce a station maps plot.

In [None]:
import numpy as np
import pandas as pd
import xarray as xr

from scores.continuous import isotonic_fit
from scores.processing import broadcast_and_match_nan

import plotly.graph_objects as go
import plotly.express as px

### Load data and match missing values
Data is for the Automatic Weather Stations located over inland Australia

In [None]:
official = xr.open_dataarray(
    "data/dailypop/Official_DailyPoP_00_20211201-20220228_inland.nc"
)
official = official.sel(lead_day=1)
# Note that the 12Z AutoFcst was the automated guidance available to meteorologists for the
# afternoon (00Z) official forecast issue.
autofcst = xr.open_dataarray(
    "data/dailypop/AutoFcst_DailyPoP_12_20211201-20220228_inland.nc"
)
autofcst = autofcst.sel(lead_day=1)
# Obs contains values of 0 (no rain) and 1 (rain).
obs = xr.open_dataarray("data/dailypop/obs_DailyPoP_20211201-20220228_inland.nc")

# Match missing data between datasets
official, autofcst, obs = broadcast_and_match_nan(official, autofcst, obs)

### Perform Isotonic regression with 10,000 bootstrap samples
This should take under one minute to complete.

In [None]:
# Set seed to make results reproducible
np.random.seed(0)

iso_official_dict = isotonic_fit(official, obs, bootstraps=10000)
iso_autofcst_dict = isotonic_fit(autofcst, obs, bootstraps=10000)

### Produce reliability diagram

In [None]:
figure = go.Figure()
official_band_fillcolour = "rgba(230,159,0,0.5)"
official_line_colour = "rgba(230,159,0,1)"

autofcst_band_fillcolour = "rgba(86,180,233,0.5)"
autofcst_line_colour = "rgba(86,180,233,1)"

total_min = 0
total_max = 100

# Generate 10 bins for forecast histograms
bins = np.arange(0, 110, 10)
hist_official, _ = np.histogram(
    iso_official_dict["fcst_sorted"],
    bins=bins,
    weights=iso_official_dict["fcst_counts"],
)

hist_autofcst, _ = np.histogram(
    iso_autofcst_dict["fcst_sorted"],
    bins=bins,
    weights=iso_autofcst_dict["fcst_counts"],
)

figure.add_shape(
    type="line",
    x0=total_min,
    y0=total_min,
    x1=total_max,
    y1=total_max,
    line=dict(color="black", dash="dot"),
)

figure.add_bar(
    x=bins + 5,
    y=100 * hist_official / hist_official.sum(),
    width=4,
    marker=dict(color=official_band_fillcolour),
    showlegend=False,
)

figure.add_bar(
    x=bins + 5,
    y=100 * hist_autofcst / hist_autofcst.sum(),
    width=4,
    marker=dict(color=autofcst_band_fillcolour),
    showlegend=False,
)

# Add Offical lines
figure.add_trace(
    go.Scatter(
        x=iso_official_dict["fcst_sorted"],
        y=100 * iso_official_dict["confidence_band_upper_values"],
        mode="lines",
        line=dict(width=0, color=official_band_fillcolour),
        showlegend=False,
    )
)
figure.add_trace(
    go.Scatter(
        x=iso_official_dict["fcst_sorted"],
        y=100 * iso_official_dict["confidence_band_lower_values"],
        mode="lines",
        line=dict(width=0, color=official_band_fillcolour),
        fillcolor=official_band_fillcolour,
        fill="tonexty",
        showlegend=False,
    )
)

figure.add_trace(
    go.Scatter(
        x=iso_official_dict["fcst_sorted"],
        y=100 * iso_official_dict["regression_values"],
        name="Official",
        mode="lines",
        line=dict(color=official_line_colour),
    )
)

# Add AutoFcst lines
figure.add_trace(
    go.Scatter(
        x=iso_autofcst_dict["fcst_sorted"],
        y=100 * iso_autofcst_dict["confidence_band_upper_values"],
        mode="lines",
        line=dict(width=0, color=autofcst_band_fillcolour),
        showlegend=False,
    )
)
figure.add_trace(
    go.Scatter(
        x=iso_autofcst_dict["fcst_sorted"],
        y=100 * iso_autofcst_dict["confidence_band_lower_values"],
        mode="lines",
        line=dict(width=0, color=autofcst_band_fillcolour),
        fillcolor=autofcst_band_fillcolour,
        fill="tonexty",
        showlegend=False,
    )
)
figure.add_trace(
    go.Scatter(
        x=iso_autofcst_dict["fcst_sorted"],
        y=100 * iso_autofcst_dict["regression_values"],
        name="Automated",
        mode="lines",
        line=dict(color=autofcst_line_colour),
    )
)

figure.add_annotation(x=30, y=70, text="Underforecast", textangle=-45, showarrow=False)

figure.add_annotation(x=70, y=30, text="Overforecast", textangle=-45, showarrow=False)

figure.update_layout(
    xaxis_title="Forecast probability (%)",
    yaxis_title="Observed frequency (%)",
    legend=dict(x=0.01, y=0.99),
    height=500,
    width=500,
    margin=go.layout.Margin(
        l=20,  # left margin
        r=20,  # right margin
        b=20,  # bottom margin
        t=20,  # top margin
    ),
)
figure.update_xaxes(showgrid=True, tickmode="linear", tick0=0, dtick=10)
figure.update_yaxes(showgrid=True, tickmode="linear", tick0=0, dtick=10)
figure.show()

In [None]:
figure.write_image("results/figures/dailypop.pdf")

In [None]:
df = pd.read_csv("data/aws_metadata/station_data.csv")
df = df[df["station_number"].isin(official.station_number.values)]


fig = px.scatter_geo(
    df, lat="LATITUDE", lon="LONGITUDE", color_discrete_sequence=["red"]
)

fig.update_geos(
    resolution=50,
    lonaxis_range=[110, 155],
    lataxis_range=[-45, -10],
    showcoastlines=True,
    showland=True,
    showocean=True,
    oceancolor="rgb(144, 195, 245)",
    showcountries=True,
    showframe=True,
    lonaxis=dict(showgrid=True, gridcolor="gray", gridwidth=0.5, dtick=5),
    lataxis=dict(showgrid=True, gridcolor="gray", gridwidth=0.5, dtick=5),
)

fig.update_traces(marker={"size": 4})
fig.update_layout(
    title="e)",
    height=350,
    width=400,
    margin=go.layout.Margin(
        l=0,  # left margin
        r=0,  # right margin
        b=0,  # bottom margin
        t=40,  # top margin
    ),
)
fig.show()

In [None]:
fig.write_image("results/station_maps/e_inland_stations.pdf")