# Wind speed around exposed peaks (fig 4)

In this notebook we:
1. evaluate the wind speed exposed peaks hindcast experiment and generate figure 4, 
2. produce a station maps plot, and
3. calculate if the difference in errors is statistically significant.

In [None]:
import pandas as pd
import xarray as xr

from scores.continuous import mse
from scores.processing import broadcast_and_match_nan
from scores.stats.statistical_tests import diebold_mariano

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

### Open data and prepare data

In [None]:
# Data for exposed peaks experiment
official_peaks = xr.open_dataarray(
    "data/exposed_peaks/Official_WindMag_00_20171201-20180228_exposed_peaks.nc"
)
# The existing automated alternative for the 2017-2018 summer was FCF 1.0 with an 18Z base time
automated_peaks = xr.open_dataarray(
    "data/exposed_peaks/FCF_1_0_AutoFcst_WindMag_18_20171201-20180228_exposed_peaks.nc"
)
# The hindcast was called FCF1.3
hindcast_peaks = xr.open_dataarray(
    "data/exposed_peaks/FCF_1_3_AutoFcst_WindMag_18_20171201-20180228_exposed_peaks.nc"
)
obs_peaks = xr.open_dataarray(
    "data/exposed_peaks/obs_WindMag_20171201-20180228_exposed_peaks.nc"
)

# Match missing data between datasets
official_peaks, automated_peaks, hindcast_peaks, obs_peaks = broadcast_and_match_nan(
    official_peaks, automated_peaks, hindcast_peaks, obs_peaks
)

### Calculate scores and produce figure

In [None]:
official_peaks_mse = mse(
    official_peaks, obs_peaks, preserve_dims=["lead_day", "valid_start"]
)
hindcast_peaks_mse = mse(
    hindcast_peaks, obs_peaks, preserve_dims=["lead_day", "valid_start"]
)
autofcst_peaks_mse = mse(
    automated_peaks, obs_peaks, preserve_dims=["lead_day", "valid_start"]
)


# Difference between AutoFcst and the hindcast
diff_autofcst_hindcast = autofcst_peaks_mse - hindcast_peaks_mse
diff_autofcst_hindcast = diff_autofcst_hindcast.assign_coords(
    h=("lead_day", [2, 3, 4, 5, 6, 7, 8])
)
dm_result = diebold_mariano(diff_autofcst_hindcast, "lead_day", "h")
dm_result

In [None]:
official_line_colour = "rgba(230,159,0,1)"
autofcst_line_colour = "rgba(86,180,233,1)"
hindcast_line_colour = "#009E73"

figure = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=(
        "<b>(a)</b>",
        "<b>(b)</b>",
    ),
)
figure.update_annotations(font_size=12, xshift=-160, xanchor="left")
figure.add_trace(
    go.Scatter(
        x=official_peaks_mse.lead_day,
        y=official_peaks_mse.mean(dim="valid_start").values,
        line=dict(color=official_line_colour),
        name="Official",
    )
)
figure.add_trace(
    go.Scatter(
        x=autofcst_peaks_mse.lead_day,
        y=autofcst_peaks_mse.mean(dim="valid_start").values,
        line=dict(color=autofcst_line_colour),
        name="Existing automated",
    )
)
figure.add_trace(
    go.Scatter(
        x=hindcast_peaks_mse.lead_day,
        y=hindcast_peaks_mse.mean(dim="valid_start").values,
        line=dict(color=hindcast_line_colour),
        name="Hindcast experiment",
    )
)

figure.add_trace(
    go.Scatter(
        x=hindcast_peaks_mse.lead_day,
        y=dm_result["mean"],
        line=dict(color="black"),
        error_y=dict(
            thickness=1,
            type="data",
            symmetric=False,
            array=dm_result["ci_upper"] - dm_result["mean"],
            arrayminus=dm_result["mean"] - dm_result["ci_lower"],
        ),
        showlegend=False,
    ),
    row=1,
    col=2,
)

figure.add_hline(y=0, row=1, col=2)

figure.update_layout(
    legend=dict(yanchor="top", y=0.65, xanchor="left", x=0.01),
    height=300,
    width=800,
    margin=go.layout.Margin(
        l=20,  # left margin
        r=20,  # right margin
        b=20,  # bottom margin
        t=20,  # top margin
    ),
)
figure.update_yaxes(title_text="MSE (kt<sup>2</sup>)", row=1, col=1)
figure.update_yaxes(title_text="Difference in MSE (kt<sup>2</sup>)", row=1, col=2)
figure.update_xaxes(title_text="Lead day", tickmode="linear", tick0=0, dtick=1)

In [None]:
figure.write_image("results/figures/wind_exposed_peaks.pdf")

### Generate station map

In [None]:
df = pd.read_csv("data/aws_metadata/station_data.csv")
df_exposed_peaks = df[df["station_number"].isin(official_peaks.station_number.values)]

In [None]:
fig = px.scatter_geo(
    df_exposed_peaks, lat="LATITUDE", lon="LONGITUDE", color_discrete_sequence=["red"]
)

fig.update_geos(
    resolution=110,
    lonaxis_range=[110, 155],
    lataxis_range=[-45, -10],
    showcoastlines=True,
    showland=True,
    showocean=True,
    oceancolor="rgb(144, 195, 245)",
    showcountries=True,
    showframe=True,
    lonaxis=dict(showgrid=True, gridcolor="gray", gridwidth=0.5, dtick=5),
    lataxis=dict(showgrid=True, gridcolor="gray", gridwidth=0.5, dtick=5),
)
fig.update_traces(marker={"size": 4})
fig.update_layout(
    title="b)",
    height=350,
    width=400,
    margin=go.layout.Margin(
        l=0,  # left margin
        r=0,  # right margin
        b=0,  # bottom margin
        t=40,  # top margin
    ),
)
fig.show()

In [None]:
fig.write_image("results/station_maps/b_wind_exposed_peaks_stations.pdf")