# Calculate metrics using climpred

---

Now that all of our model and observational data is ready for analysis, we can run this notebook and calculate various metrics using climpred across all models and seasons and compare how they do. It is recommended that you use `dask` to run this notebook as we are looking at geospatial data and making maps. You can start up a `dask` cluster by running the notebook `cluster.ipynb` and copying the Scheduler tcp number into this notebook.

In [1]:
import cftime
import numpy as np
import xarray as xr
import pandas as pd
xr.set_options(keep_attrs=True)
import climpred
import intake
from tqdm import tqdm
import dask.array as da
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator
import xskillscore as xs
import warnings
warnings.filterwarnings("ignore")

from IPython.display import Image 
from dask.distributed import Client
import dask.config
dask.config.set({"array.slicing.split_large_chunks": False})


<dask.config.set at 0x7f807449e7f0>

In [2]:
# Additional schnickschnack for state-dependence
#conda install -c conda-forge xesmf
#! pip install intake-xarray
#conda install -c conda-forge regionmask
#!pip install intake-geopandas
#!pip install intake_geopandas

- Make sure you have copied the correct tcp here from the `cluster.ipynb` notebook

In [3]:
client = Client("tcp://10.12.206.54:35199")

## Here is where you choose your variable, metric and start/end time.

In [4]:
variable = "t2m" #can be t2m, tp, gh_500
metric = "acc" #can be rps, rmse, acc
data = "anom" # can be anom, raw
lead = "biweekly" #biweekly or daily
area = "geospatial"
start = "1999-01-01"
#end = "2021-12-31" 
end = "2021-12-31"
models = ["ECMWF","CESM2","NCEP"] #this notebook uses all three of these models

In [5]:
# Here we are just setting options for the different metrics and mapping.
if metric == "acc":
    comp="e2o"; dim="init"; ens="ensmean" #options for metrics
    cmap="RdBu_r" #options for maps
elif metric=="rmse":
    comp="e2o"; dim="init"; ens="ensmean" #options for metrics
    cmap="viridis" #options for maps
elif metric=="rps":
    comp="m2o"; dim=["init","member"]; ens="" #options for metrics
    cmap="viridis" #options for maps

## Now we read in and load the data into `dask`

We are using the intake catalog to find the data and load it up. Make sure you have the file `ASP_data_catalog.yml` in your local directory. Or you can find it here: `/glade/campaign/mmm/c3we/jaye/S2S_zarr/`

We have an `if` statement here telling us to load in category_edge files, only if our metric of choice is `rps`

In [6]:
cat = intake.open_catalog('ASP_data_catalog.yml')

In [7]:
hinds = {}
for m in models:
    hinds[m] = cat[m](data=data, lead=lead, dim=area).to_dask().astype('float32')
verif = cat['OBS'](data=data, lead=lead, dim=area).to_dask().astype('float32')

GroupNotFoundError: group not found at path ''

In [None]:
if metric == "rps":
    hinds_edges = {}
    for m in models:
        hinds_edges[m] = cat['cat_edges'](data=data, model=m, lead=lead, dim=area).to_dask().astype('float32') \
                         .chunk({"category_edge": -1, "dayofyear": -1, "lat": 45, "lead": -1, "lon": 60}).persist()
    verif_edges = cat['cat_edges'](data='anom', model='OBS', lead='biweekly', dim='geospatial').to_dask().astype('float32') \
                  .chunk({"category_edge": -1, "dayofyear": -1, "lat": 45, "lon": 60}).persist()

- All of the model data is now loaded into a dictionary so that we can have them all together for comparison purposes.

In [None]:
# is seasonal data available for all models and rechunk
for h in hinds:
    print(hinds[h].init.dt.season.to_index().unique(), 'freq =',hinds[h].init.to_index().freq) # freq would show weekly but calendar conversion breaks this
    hinds[h] = hinds[h].chunk({"member": "auto", "init": -1, "lead": "auto", "lat": 45, "lon": 60}).persist()
    hinds[h] = hinds[h].sel(init=slice(start,end))
verif = verif.sel(time=slice(start,end))
verif = verif.chunk({"time": -1, "lat": 45, "lon": 60})

In [None]:
## Read in labels containing classification of projection onto large-scale pattern indexes

## I) +/- PNA  +/- NAO

In [None]:
import os.path
path="/glade/work/berner/ASP2021_tutorials/s2s_verif_and_data/data/"
#PNA
ls_pos=xr.open_dataset(os.path.join(path, 'indexfield_PNA_pos.nc'))
ls_neg=xr.open_dataset(os.path.join(path, 'indexfield_PNA_neg.nc')) 
ls_neu=xr.open_dataset(os.path.join(path, 'indexfield_PNA_neutral.nc')) 
#NOA                       
#ls_pos=xr.open_dataset(os.path.join(path, 'indexfield_NAO_pos.nc'))
#ls_neg=xr.open_dataset(os.path.join(path, 'indexfield_NAO_neg.nc'))  
#ls_neu=xr.open_dataset(os.path.join(path, 'indexfield_NAO_neutral.nc')) # does not project on NAO
#
#ls_neu=xr.open_dataset(os.path.join(path, 'indexfield_NAO_and_PNA_neutral.nc')) # does project on neither NAO or PNA                                   

In [None]:
ls_pos

In [None]:
 #nao_plus has dimension "time" and hinds has dimension "init"; here we map on onto the other
ls_pos["init"] = [cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in ls_pos.time.values]      
ls_neg["init"] = [cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in ls_neg.time.values] 
ls_neu["init"] = [cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in ls_neu.time.values] 

In [None]:
ls_pos

In [None]:
# Pick the hindcasts that project onto large scale patterns
# hinds_sub1 = {}
hinds_ls_pos = {}
hinds_ls_neg = {}
hinds_ls_neu = {}
for h in hinds:
    #hinds_sub[h]=hinds[h].isel(init=slice(1,141)) 
    hinds_ls_pos[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,ls_pos.init))
    hinds_ls_neg[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,ls_neg.init))
    hinds_ls_neu[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,ls_neu.init))

In [None]:
diff=hinds_ls_pos["CESM2"].mean("init")-hinds_ls_neg["CESM2"].mean("init")

In [None]:
diff.gh_500.mean("member").plot(col='lead',cmap=cmap,robust=True)

In [None]:
diff.t2m.mean("member").plot(col='lead',cmap=cmap,robust=True)

In [None]:
diff.tp.mean("member").plot(col='lead',cmap=cmap,robust=True)

## MJO Index 

In [None]:
#One computed from ERAI fields is here: (1990-2015?)
#mjo=xr.open_dataset("/glade/work/acsubram/Reanalyses/ERA_Interim/MJO_PC_INDEX_ERAI.nc")
#One computed from NOAA satellite OLR and NCEP winds is here:
DSmjo=xr.open_dataset("/glade/work/acsubram/Reanalyses/NOAA_NCEP/MJO_PC_INDEX.nc")

In [None]:
#MJO pandas data frame
DFmjo=pd.DataFrame({'Month':DSmjo['time.month'].data,'Year':DSmjo['time.year'].data,'Day':DSmjo['time.day'].data,'MJO_INDEX':np.array(DSmjo['MJO_INDEX']),'MJO_PC1':np.array(DSmjo['PC1']),'MJO_PC2':np.array(DSmjo['PC2'])})
DFmjo.head(10)

In [None]:
DFmjo

In [1]:

Image(url='https://rmets.onlinelibrary.wiley.com/cms/asset/104802a6-dc1f-4176-9705-03cfbb34ad3e/met1780-fig-0004-m.jpg', width=600)

NameError: name 'Image' is not defined

## Here is an MJO Phase Primer:

To Get the Phase of the MJO_Index:

"eight phases that are divided evenly and each occupies 45째 on the RMM phase diagram. The angle is defined by the arctangent of RMM2 over RMM1 in degrees and a negative value is converted to positive by adding 360째. As an example, phase 1 is located in the half-open range of (180째, 225째]. The RMM1 is generally nonzero so all angles are defined."


As a reminder this image shows the phases vs locations in PC space 

In [None]:
DSmjo

In [None]:
%%time
#we do the MJO phase calculation here:
MJO_phase = []
for ii in range(DFmjo.shape[0]):
    MJO_INDEX = DFmjo.iloc[ii]['MJO_INDEX']
    RMM1 = DFmjo.iloc[ii]['MJO_PC1']
    RMM2 = DFmjo.iloc[ii]['MJO_PC2']
    if np.isnan(MJO_INDEX):
        MJO_phase.append(np.nan)
    elif MJO_INDEX < 1:
        MJO_phase.append(0)
    else:
        ang = np.degrees(np.arctan2(RMM2,RMM1))
        if ang<0:
            ang = ang+360
        ang = ang+180 
        if ang > 360:
            ang=ang-360
        MJO_phase.append(np.floor((ang)/45)+1)     
DFmjo['MJO_Phase']=MJO_phase
DFmjo.head(10)
print('...ignore the initial nan values... the dates are there....')
xr_mjo=DFmjo.to_xarray()

In [None]:
# Take the following dataset as an example
data_set=xr.Dataset( coords={'lon': (['x', 'y'], lon),
                    'lat': (['x', 'y'], lat),
                    'time': pd.date_range('2014-09-06', periods=3)})
temp=np.array([[25, 24, 20, -12],[23, 21, 22, -11]])
data_set["Temperature"]=(['x', 'y', 'time'],  temp)

In [None]:
xr_mjo.MJO_Phase.plot()

In [None]:
xr_mjo

In [None]:
mjo_region ={}
for ii in [1,2,3,4,5,6,7,8]:
    print(ii)
    indis=np.where(xr_mjo.MJO_Phase==ii)
    mjo_region = DSmjo.sel(time=DSmjo.time[indis])
    #mjo_region["Phase"] =  ({"phase": DSmjo.sel(time=DSmjo.time[indis])} )

In [None]:
mjo_region

In [None]:
data_set["Temperature"]=(['x', 'y', 'time'],  temp)
da.assign_coords({"lon_2": ("lon", lon_2)})

In [None]:
mjo_region3["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) \
                     for d in mjo_region3.time.values]

In [None]:
mjo_region1["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region1.time.values]
mjo_region2["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region2.time.values]

mjo_region4["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region4.time.values]
mjo_region5["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region5.time.values]
mjo_region6["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region6.time.values]
mjo_region7["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region7.time.values]
mjo_region8["init"]=[cftime.DatetimeProlepticGregorian(pd.DatetimeIndex([d]).year[0], pd.DatetimeIndex([d]).month[0], pd.DatetimeIndex([d]).day[0]) for d in mjo_region8.time.values]# Pick the hindcasts that project onto MJO index in region 1 
hinds_mjo= {} 
for h in hinds:
    hinds_mjo[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region8.init))

In [None]:
# Pick the hindcasts that project onto MJO index in region 1 
hinds_mjo1= {} 
hinds_mjo2= {} 
hinds_mjo3= {} 
hinds_mjo4= {} 
hinds_mjo5= {} 
hinds_mjo6= {} 
hinds_mjo7= {} 
hinds_mjo8= {} 
for h in hinds:
    hinds_mjo1[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region1.init))
    hinds_mjo2[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region2.init))
    hinds_mjo3[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region3.init))
    hinds_mjo4[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region4.init))
    hinds_mjo5[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region5.init))
    hinds_mjo6[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region6.init))
    hinds_mjo7[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region7.init))
    hinds_mjo8[h]=hinds[h].sel(init=np.intersect1d(hinds[h].init,mjo_region8.init))
                                                                                                                                      

In [None]:
diff=hinds["CESM2"].mean("init")-hinds_mjo4["CESM2"].mean("init")
diff=hinds_mjo4["CESM2"].mean("init")

In [None]:
diff.tp.mean("member").plot(col='lead',cmap=cmap,robust=True)

In [None]:
diff=hinds["ECMWF"].mean("init")-hinds_mjo8["ECMWF"].mean("init")
diff.tp.mean("member").plot(col='lead',cmap=cmap,robust=True)
diff=hinds_mjo8["CESM2"].mean("init")

## Nino3.4 Index

In [None]:
#Here I am computing the Nino3.4 index from t2m at lead (0) of the ensemble mean forecast as proxy for SST (this should be fine).
#However this data still has seasonaltiy, which needs to be removed.
SST_proxy=hinds["ECMWF"].isel(lead=0).mean("member").t2m
Nino34=SST_proxy.isel(lat=slice(30, 50),lon=[170,120]).mean("lon").mean("lat").plot()

In [None]:
SST_proxy=hinds["CESM2"].isel(lead=0).mean("member").t2m
Nino34=SST_proxy.isel(lat=slice(30, 50),lon=[170,120]).mean("lon").mean("lat").plot()

In [None]:
SST_proxy=verif.t2m
Nino34=SST_proxy.isel(lat=slice(30, 50),lon=[170,120]).mean("lon").mean("lat").plot()

## Subset hindcasts based on projections onto large-scale patterns

In [None]:
# Hack to reduce number of "neutral" forecasts
#for h in hinds:
#     hinds_ls_neu[h]=hinds_ls_neu[h].isel(init=slice(1,141)) 

In [None]:
he = {}
met = {}
for h in hinds:
    print(h)
    he[h] = climpred.HindcastEnsemble(hinds_ls_neu[h]).add_observations(verif)
    if metric=="rps":
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits")
        met[h] = he[h].verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
    else:
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits", skipna=True)
        met[h] = he[h].verify(**metric_kwargs)
    met[h] = met[h].compute()

In [None]:
he_ls_neg = {}
met_ls_neg = {}
for h in hinds:
    print(h)
    he_ls_neg[h] = climpred.HindcastEnsemble(hinds_ls_neg[h]).add_observations(verif)
    if metric=="rps":
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits")
        met_ls_neg[h] = he_ls_neg[h].verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
    else:
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits", skipna=True)
        met_ls_neg[h] = he_ls_neg[h].verify(**metric_kwargs)
    met_ls_neg[h] = met_ls_neg[h].compute()

In [None]:
he_ls_pos = {}
met_ls_pos = {}
for h in hinds:
    print(h)
    he_ls_pos[h] = climpred.HindcastEnsemble(hinds_ls_pos[h]).add_observations(verif)
    if metric=="rps":
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits")
        met_ls[h] = he_ls_pos[h].verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
    else:
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits", skipna=True)
        met_ls_pos[h] = he_ls_pos[h].verify(**metric_kwargs)
    met_ls_pos[h] = met_ls_pos[h].compute()

In [None]:
he_mjo = {}
met_mjo = {}
for h in hinds:
    print(h)
    he_mjo[h] = climpred.HindcastEnsemble(hinds_mjo[h]).add_observations(verif)
    if metric=="rps":
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits")
        met_mjo[h] = he_mjo[h].verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
    else:
        metric_kwargs = dict(metric=metric, comparison=comp, dim=dim, alignment="same_inits", skipna=True)
        met_mjo[h] = he_mjo[h].verify(**metric_kwargs)
    met_mjo[h] = met_mjo[h].compute()

In [None]:
met_1 = xr.concat([met[models[0]], met[models[1]], met[models[2]]], dim='model') \
          .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
met_2 = xr.concat([met_ls_neg[models[0]], met_ls_neg[models[1]], met_ls_neg[models[2]]], dim='model') \
          .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
met_3 = xr.concat([met_ls_pos[models[0]], met_ls_pos[models[1]], met_ls_pos[models[2]]], dim='model') \
          .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
met_4 = xr.concat([met_mjo[models[0]], met_mjo[models[1]], met_mjo[models[2]]], dim='model') \
          .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
diff=met_2.sel(model="CESM2")-met_1.sel(model="CESM2") #negative is better for rmse

In [None]:
# negative values mean better skill of state-dependent forecasts if metric = rmse; annual signal
diff.t2m.plot(col='lead',cmap="seismic",robust=True)

In [None]:
diff=met_3.sel(model="CESM2")-met_1.sel(model="CESM2") #negative is better for rmse/ wrose for acc

In [None]:
diff.t2m.plot(col='lead',cmap="seismic",robust=True)

In [None]:
diff=met_4.sel(model="ECMWF")-met_1.sel(model="ECMWF") #negative is better for rmse/ wrose for acc

In [None]:
diff.t2m.plot(col='lead',cmap="seismic",robust=True)

## Seasonal data

Now we will create seasonal averages of the data. Prior to this we have been looking at annual data. We use `groupby` here to group into seasons and then run `verify` over each of the seasons and models for the metric of our choice. They are then concatenated together and plotted for `lead=15` (weeks 3-4).

In [None]:
groupby = "season"
met_seas = {}
for h in hinds:
    met_groups = []
    label_groups = []
    # Loops through all inits for a given season.
    for label_group, group in tqdm(he[h].get_initialized().groupby(f"init.{groupby}")):
        # select only season inits
        if metric=="rps":
            met_group = he[h].sel(init=group.init).verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
        else:
            met_group = he[h].sel(init=group.init).verify(**metric_kwargs)
        met_groups.append(met_group)
        label_groups.append(label_group)
    met_groups = xr.concat(met_groups, dim=groupby).assign_coords(season=label_groups)
    met_seas[h] = met_groups.compute()

In [None]:
groupby = "season"
met_seas_ls_neg = {}
for h in hinds:
    met_groups_ls_neg = []
    label_groups_ls_neg = []
    # Loops through all inits for a given season.
    for label_group_ls_neg, group in tqdm(he_ls_neg[h].get_initialized().groupby(f"init.{groupby}")):
        # select only season inits
        if metric=="rps":
            met_group_ls_neg = he_ls_neg[h].sel(init=group.init).verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
        else:
            met_group_ls_neg = he_ls_neg[h].sel(init=group.init).verify(**metric_kwargs)
        met_groups_ls_neg.append(met_group_ls_neg)
        label_groups_ls_neg.append(label_group_ls_neg)
    met_groups_ls_neg = xr.concat(met_groups_ls_neg, dim=groupby).assign_coords(season=label_groups_ls_neg)
    met_seas_ls_neg[h] = met_groups_ls_neg.compute()

In [None]:
groupby = "season"
met_seas_ls_pos = {}
for h in hinds:
    met_groups_ls_pos = []
    label_groups_ls_pos = []
    # Loops through all inits for a given season.
    for label_group_ls_pos, group in tqdm(he_ls_pos[h].get_initialized().groupby(f"init.{groupby}")):
        # select only season inits
        if metric=="rps":
            met_group_ls_pos = he_ls_pos[h].sel(init=group.init).verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
        else:
            met_group_ls_pos = he_ls_pos[h].sel(init=group.init).verify(**metric_kwargs)
        met_groups_ls_pos.append(met_group_ls_pos)
        label_groups_ls_pos.append(label_group_ls_pos)
    met_groups_ls_pos = xr.concat(met_groups_ls_pos, dim=groupby).assign_coords(season=label_groups_ls_pos)
    met_seas_ls_pos[h] = met_groups_ls_pos.compute()

In [None]:
groupby = "season"
met_seas_mjo = {}
for h in hinds:
    met_groups_mjo = []
    label_groups_mjo = []
    # Loops through all inits for a given season.
    for label_group_mjo, group in tqdm(he_mjo[h].get_initialized().groupby(f"init.{groupby}")):
        # select only season inits
        if metric=="rps":
            met_group_mjo = he_mjo[h].sel(init=group.init).verify(category_edges=(verif_edges, hinds_edges[h]),**metric_kwargs)
        else:
            met_group_mjo = he_mjo[h].sel(init=group.init).verify(**metric_kwargs)
        met_groups_mjo.append(met_group_mjo)
        label_groups_lmjo.append(label_group_mjo)
    met_groups_mjo = xr.concat(met_groups_mjo, dim=groupby).assign_coords(season=label_groups_mjo)
    met_seas_mjo[h] = met_groups_mjo.compute()

In [None]:
met_seas_all = xr.concat([met_seas[models[0]], met_seas[models[1]], met_seas[models[2]]], dim='model') \
               .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
met_seas_all_ls_neg = xr.concat([met_seas_ls_neg[models[0]], met_seas_ls_neg[models[1]], met_seas_ls_neg[models[2]]], dim='model') \
               .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
met_seas_all_ls_pos = xr.concat([met_seas_ls_pos[models[0]], met_seas_ls_pos[models[1]], met_seas_ls_pos[models[2]]], dim='model') \
               .assign_coords(model=[models[0], models[1], models[2]])

In [None]:
diff=met_seas_all_ls_neg-met_seas_all

In [None]:
#met_seas_all.sel(lead=15)[variable].plot(col=groupby, row='model', robust=True)
diff.sel(lead=1)["t2m"].plot(col=groupby, row='model', robust=True)

In [None]:
# negative values mean better/worse skill of state-dependent forecasts if metric = rmse/acc

In [None]:
#diff.sel(lead=1)["gh_500"].plot(col=groupby, row='model', robust=True)

In [None]:
diff=met_seas_all_ls_pos-met_seas_all

In [None]:
#met_seas_all.sel(lead=15)[variable].plot(col=groupby, row='model', robust=True)
diff.sel(lead=1)["t2m"].plot(col=groupby, row='model', robust=True)

In [None]:
#iff.sel(lead=1)["gh_500"].plot(col=groupby, row='model', robust=True)

## Area weighting

Next we run cosine area weighting over that data to get a weighted lat/lon average over the domain. We then print out the weights and plot them on bar charts to compare different seasons and models.

In [None]:
# JB Comment: Global makes no longer sense in this context. 
# I suggets subsetting for regions: Europe and US

In [None]:
weight = met_seas_all.weighted(np.cos(np.deg2rad(met_seas_all.lat))).mean(("lat", "lon"))[variable].drop('skill')
weight_ls = met_seas_all_ls.weighted(np.cos(np.deg2rad(met_seas_all_ls.lat))).mean(("lat", "lon"))[variable].drop('skill')

In [None]:
seasons = np.array(weight.season)
seasons = ["DJF"]

In [None]:
barWidth = 0.25
rw = np.arange(3)
rw1 = [x + barWidth + 0.025 for x in rw]
rw2 = [x + barWidth + 0.025 for x in rw1]
if weight.min() < 0.:
    ymin = weight.min()*0.6+weight.min()
else:
    ymin = 0.0
ymax = weight.max()*0.6+weight.max()
for s in seasons:
    plt.bar(rw,weight.sel(season=s,model=models[0]), width = barWidth, color = (0, 0.4470, 0.7410), edgecolor="white",label=models[0])
    plt.bar(rw1,weight.sel(season=s,model=models[2]), width = barWidth, color = (0.6350, 0.0780, 0.1840), alpha=0.8,edgecolor="white",label=models[2])
    plt.bar(rw2,weight.sel(season=s,model=models[1]), width = barWidth, color = (0.4, .75, 0.1), alpha=0.8,edgecolor="white",label=models[1])
    plt.xticks([r + barWidth + 0.025 for r in range(3)], ["Weeks 1-2", "Weeks 3-4", "Weeks 5-6"],fontsize=15)
    plt.ylim(ymin,ymax)
    plt.ylabel(metric.upper(),fontsize=18,fontweight="bold")
    plt.xlabel("Week",fontsize=18,fontweight="bold")
    plt.grid()
    plt.legend(borderaxespad=0.6,edgecolor="black",prop={'size': 15},loc="upper right")
    plt.title(variable.upper()+" "+metric.upper()+" for season = "+s,fontsize=18,fontweight="bold")
    plt.show()

In [None]:
barWidth = 0.25
rw = np.arange(3)
rw1 = [x + barWidth + 0.025 for x in rw]
rw2 = [x + barWidth + 0.025 for x in rw1]
if weight.min() < 0.:
    ymin = weight.min()*0.6+weight.min()
else:
    ymin = 0.0
ymax = weight.max()*0.6+weight.max()
for s in seasons:
    plt.bar(rw,weight_ls.sel(season=s,model=models[0]), width = barWidth, color = (0, 0.4470, 0.7410), edgecolor="white",label=models[0])
    plt.bar(rw1,weight_ls.sel(season=s,model=models[2]), width = barWidth, color = (0.6350, 0.0780, 0.1840), alpha=0.8,edgecolor="white",label=models[2])
    plt.bar(rw2,weight_ls.sel(season=s,model=models[1]), width = barWidth, color = (0.4, .75, 0.1), alpha=0.8,edgecolor="white",label=models[1])
    plt.xticks([r + barWidth + 0.025 for r in range(3)], ["Weeks 1-2", "Weeks 3-4", "Weeks 5-6"],fontsize=15)
    plt.ylim(ymin,ymax)
    plt.ylabel(metric.upper(),fontsize=18,fontweight="bold")
    plt.xlabel("Week",fontsize=18,fontweight="bold")
    plt.grid()
    plt.legend(borderaxespad=0.6,edgecolor="black",prop={'size': 15},loc="upper right")
    plt.title(variable.upper()+" "+metric.upper()+" for season = "+s,fontsize=18,fontweight="bold")
    plt.show()