# Generate wind experiments figures

In [83]:
import numpy as np
import xarray as xr

from scores.continuous import mse
from scores.processing import broadcast_and_match_nan
from scores.stats.statistical_tests import diebold_mariano

from plotly.subplots import make_subplots
import plotly.graph_objects as go

### Get Data for wind max-in-hour experiment

In [84]:
official_max_exp = xr.open_dataarray("../data/Official_WindMag_00_20190901-20191130.nc")
# Note that the 12Z AutoFcst was the automated guidance available to meteorologists for the
# afternoon (00Z) official forecast issue.
autofcst_max_exp = xr.open_dataarray("../data/FCF_2_0_AutoFcst_WindMag_18_20190901-20191130.nc")
hindcast_max_exp = xr.open_dataarray("../data/AutoFcstMax_WindMag_12_20190901-20191130.nc")
obs_max_exp = xr.open_dataarray("../data/obs_WindMagMaxInHour_20190901-20191130.nc")

# Match missing data between datasets
official_max_exp, autofcst_max_exp, hindcast_max_exp, obs_max_exp = broadcast_and_match_nan(official_max_exp, autofcst_max_exp, hindcast_max_exp, obs_max_exp)

### Get data for exposed peaks experiment

In [85]:
# Data for exposed peaks experiment
official_peaks = xr.open_dataarray("../data/Official_WindMag_00_20171201-20180228_exposed_peaks.nc")
# The existing automated alternative for the 2017-2018 summer was FCF 1.0 with an 18Z base time
automated_peaks = xr.open_dataarray("../data/FCF_1_0_AutoFcst_WindMag_18_20171201-20180228_exposed_peaks.nc")
# The hindcast was called FCF1.3
hindcast_peaks = xr.open_dataarray("../data/FCF_1_3_AutoFcst_WindMag_18_20171201-20180228_exposed_peaks.nc")
obs_peaks = xr.open_dataarray("../data/obs_WindMag_20171201-20180228_exposed_peaks.nc")

# Match missing data between datasets
official_peaks, automated_peaks, hindcast_peaks, obs_peaks = broadcast_and_match_nan(official_peaks, automated_peaks, hindcast_peaks, obs_peaks)

### Calulate MSE

In [86]:
official_max_exp_mse = mse(official_max_exp, obs_max_exp, preserve_dims="lead_day")
hindcast_max_exp_mse = mse(hindcast_max_exp, obs_max_exp, preserve_dims="lead_day")
autofcst_max_exp_mse = mse(autofcst_max_exp, obs_max_exp, preserve_dims="lead_day")

official_peaks_mse = mse(official_peaks, obs_peaks, preserve_dims="lead_day")
hindcast_peaks_mse = mse(hindcast_peaks, obs_peaks, preserve_dims="lead_day")
autofcst_peaks_mse = mse(automated_peaks, obs_peaks, preserve_dims="lead_day")

In [87]:
official_line_colour = 'rgba(230,159,0,1)'
autofcst_line_colour = 'rgba(86,180,233,1)'
hindcast_line_colour = "#009E73"

figure = make_subplots(rows=1, cols=2, subplot_titles=("<b>(a)</b>", "<b>(b)</b>", ))
figure.update_annotations(font_size=12, xshift=-160, xanchor="left")

# Left subfigure. WindMax experiment
figure.add_trace(
    go.Scatter(
        x=official_max_exp_mse.lead_day,
        y=official_max_exp_mse.values,
        line=dict(color=official_line_colour),
        name="Official"
    ), row=1, col=1
)
figure.add_trace(
    go.Scatter(
        x=autofcst_max_exp_mse.lead_day,
        y=autofcst_max_exp_mse.values,
        line=dict(color=autofcst_line_colour),
        name="Existing automated"
    ), row=1, col=1
)
figure.add_trace(
    go.Scatter(
        x=hindcast_max_exp_mse.lead_day,
        y=hindcast_max_exp_mse.values,
        line=dict(color=hindcast_line_colour),
        name="Hindcast experiment"
    ), row=1, col=1
)

# Right subfigure. Wind exposed peaks experiment
figure.add_trace(
    go.Scatter(
        x=official_peaks_mse.lead_day,
        y=official_peaks_mse.values,
        line=dict(color=official_line_colour),
        name="Official",
        showlegend=False
    ), row=1, col=2
)
figure.add_trace(
    go.Scatter(
        x=autofcst_peaks_mse.lead_day,
        y=autofcst_peaks_mse.values,
        line=dict(color=autofcst_line_colour),
        name="Existing Automated",
        showlegend=False
    ), row=1, col=2
)
figure.add_trace(
    go.Scatter(
        x=hindcast_peaks_mse.lead_day,
        y=hindcast_peaks_mse.values,
        line=dict(color=hindcast_line_colour),
        name="New Automated",
        showlegend=False
    ), row=1, col=2
)

figure.update_layout(
    # yaxis_title='MSE (kt<sup>2</sup>)',
    # xaxis_title='Lead day',
    legend=dict(x=0.01,y=0.99),
    height=400,
    width=800,
    margin=go.layout.Margin(
        l=20, #left margin
        r=20, #right margin
        b=20, #bottom margin
        t=20, #top margin
    ),
)
figure.update_yaxes(title_text="MSE (kt<sup>2</sup>)", row=1, col=1)
figure.update_yaxes(title_text="MSE (kt<sup>2</sup>)", row=1, col=2)

figure.update_xaxes(title_text="Lead day", row=1, col=1, tickmode="linear", tick0=0, dtick=1)
figure.update_xaxes(title_text="Lead day", row=1, col=2, tickmode="linear", tick0=0, dtick=1)

In [88]:
figure.write_image("../figures/wind_exp.svg")

### Statistical significance testing for wind max experiment

In [89]:
official_max_exp_mse = mse(official_max_exp, obs_max_exp, preserve_dims=["lead_day", "valid_start"])
hindcast_max_exp_mse = mse(hindcast_max_exp, obs_max_exp, preserve_dims=["lead_day", "valid_start"])
autofcst_max_exp_mse = mse(autofcst_max_exp, obs_max_exp, preserve_dims=["lead_day", "valid_start"])

In [90]:
# Difference between Official and the hindcast
diff_official_hindcast = official_max_exp_mse - hindcast_max_exp_mse
diff_official_hindcast = diff_official_hindcast.assign_coords(h=("lead_day", [2, 3, 4, 5, 6, 7, 8]))
dm_result = diebold_mariano(diff_official_hindcast, "lead_day", "h")
dm_result

In [91]:
# Difference between the existing AutoFcst and the hindcast
diff_autofcst_hindcast = autofcst_max_exp_mse - hindcast_max_exp_mse
diff_autofcst_hindcast = diff_autofcst_hindcast.assign_coords(h=("lead_day", [2, 3, 4, 5, 6, 7, 8]))
dm_result = diebold_mariano(diff_autofcst_hindcast, "lead_day", "h")
dm_result

In [92]:
# Difference between the existing AutoFcst and the Official
diff_autofcst_official = autofcst_max_exp_mse - official_max_exp_mse
diff_autofcst_official = diff_autofcst_official.assign_coords(h=("lead_day", [2, 3, 4, 5, 6, 7, 8]))
dm_result = diebold_mariano(diff_autofcst_official, "lead_day", "h")
dm_result

### Statistical significance testing for exposed peaks experiment

In [93]:
official_peaks_mse = mse(official_peaks, obs_peaks, preserve_dims=["lead_day", "valid_start"])
hindcast_peaks_mse = mse(hindcast_peaks, obs_peaks, preserve_dims=["lead_day", "valid_start"])
autofcst_peaks_mse = mse(automated_peaks, obs_peaks, preserve_dims=["lead_day", "valid_start"])

In [94]:
# Difference between Official and the hindcast
diff_autofcst_hindcast = autofcst_peaks_mse - hindcast_peaks_mse
diff_autofcst_hindcast = diff_autofcst_hindcast.assign_coords(h=("lead_day", [2, 3, 4, 5, 6, 7, 8]))
dm_result = diebold_mariano(diff_autofcst_hindcast, "lead_day", "h")
dm_result


A least one NaN value was detected in `da_timeseries`. This may impact the calculation of autocovariances.

