# Generate Biweekly averages of the `lead` dimension

---

Creates biweekly averages in the `lead` dimension of the raw, anomalies, or climatology files.

Also at the bottom of the notebook we compute biweekly averages for the verification data as well.

In [1]:
import cftime
import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)
import climpred
from tqdm import tqdm
import dask.array as da
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator
import xskillscore as xs
import warnings
warnings.filterwarnings("ignore")

from dask.distributed import Client
import dask.config
dask.config.set({"array.slicing.split_large_chunks": False})

<dask.config.set at 0x146f4dad6730>

In [2]:
client = Client("tcp://128.117.208.100:43889")

Choose your model and data type

In [11]:
model = "CESM2_orig" #ECMWF, ECCC, or NCEP
data = "anom" #raw or anom or climatology

In [12]:
hinda = xr.open_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/"+model+"."+data+".daily.geospatial.zarr/", consolidated=True).astype('float32')

Because each model has a different `lead` dimension size I needed to hard code some things in.

In [13]:
if model=="ECMWF" or model=="CESM2_SPPT" or model=="CESM2_orig":
    hinda["lead"] = range(0,46) #for ECMWF
elif model=="NCEP":
    hinda["lead"] = range(0,43) #for NCEP
elif model=="ECCC":
    hinda["lead"] = range(0,32) #for ECCC

A function to calculate the biweekly averages.

In [14]:
def make_biweekly(hind):
    init_w12 = hind.sel(lead=range(1, 1 + 15)).mean(dim='lead', skipna=False)
    init_w34 = hind.sel(lead=range(16, 16 + 15)).mean(dim='lead', skipna=False)
    if model == "ECMWF" or model =="CESM2_SPPT" or model=="CESM2_orig":
        init_w56 = hind.sel(lead=range(31, 31 + 14)).mean(dim='lead', skipna=False) #for ECMWF
        hind_bw = xr.concat([init_w12, init_w34, init_w56], dim='lead')
        hind_bw['lead'] = [1, 15, 31] # lead represents first day of biweekly aggregate
    elif model == "NCEP":
        init_w56 = hind.sel(lead=range(31, 31 + 12)).mean(dim='lead', skipna=False) #for NCEP
        hind_bw = xr.concat([init_w12, init_w34, init_w56], dim='lead')
        hind_bw['lead'] = [1, 15, 31] # lead represents first day of biweekly aggregate
    elif model == "ECCC":
        hind_bw = xr.concat([init_w12, init_w34], dim='lead')
        hind_bw['lead'] = [1, 15] # lead represents first day of biweekly aggregate
    hind_bw['lead'].attrs['units'] = 'days'
    return hind_bw

## Apply the function to make biweekly averages

In [15]:
hind_bw = make_biweekly(hinda)

Rechunk and write the data to a new `zarr` file!

In [16]:
if data=="climatology":
    hind_bw = hind_bw.chunk({"dayofyear": 1, "lead": -1, "lat": 181, "lon": 360}).persist()
else:
    hind_bw = hind_bw.chunk({"member": -1, "init": 1, "lead": -1, "lat": 181, "lon": 360}).persist()

if data=="anom":
    hind_bw = hind_bw.drop("dayofyear")

In [17]:
hind_bw

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 GiB 8.20 MiB Shape (3, 1322, 11, 181, 360) (3, 1, 11, 181, 360) Count 1 Graph Layer 1322 Chunks Type float32 numpy.ndarray",1322  3  360  181  11,

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 GiB 8.20 MiB Shape (3, 1322, 11, 181, 360) (3, 1, 11, 181, 360) Count 1 Graph Layer 1322 Chunks Type float32 numpy.ndarray",1322  3  360  181  11,

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 GiB 8.20 MiB Shape (3, 1322, 11, 181, 360) (3, 1, 11, 181, 360) Count 1 Graph Layer 1322 Chunks Type float32 numpy.ndarray",1322  3  360  181  11,

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 GiB 8.20 MiB Shape (3, 1322, 11, 181, 360) (3, 1, 11, 181, 360) Count 1 Graph Layer 1322 Chunks Type float32 numpy.ndarray",1322  3  360  181  11,

Unnamed: 0,Array,Chunk
Bytes,10.59 GiB,8.20 MiB
Shape,"(3, 1322, 11, 181, 360)","(3, 1, 11, 181, 360)"
Count,1 Graph Layer,1322 Chunks
Type,float32,numpy.ndarray


In [18]:
%time hind_bw.to_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/"+model+"."+data+".biweekly.geospatial.zarr/",mode="w",consolidated=True)

CPU times: user 2.28 s, sys: 124 ms, total: 2.4 s
Wall time: 40.1 s


<xarray.backends.zarr.ZarrStore at 0x146efba6be40>

2025-01-09 12:03:09,137 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


# Make biweekly averages for verification data as well!

It's a different enough process that we want to do it separately.

In [13]:
verif = xr.open_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/OBS_SUBX.raw.daily.geospatial.zarr/", consolidated=True).astype('float32')

In [14]:
verif

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.73 MiB
Shape,"(8036, 181, 360)","(365, 181, 360)"
Count,2 Graph Layers,23 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 90.73 MiB Shape (8036, 181, 360) (365, 181, 360) Count 2 Graph Layers 23 Chunks Type float32 numpy.ndarray",360  181  8036,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.73 MiB
Shape,"(8036, 181, 360)","(365, 181, 360)"
Count,2 Graph Layers,23 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.73 MiB
Shape,"(8036, 181, 360)","(365, 181, 360)"
Count,2 Graph Layers,23 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 90.73 MiB Shape (8036, 181, 360) (365, 181, 360) Count 2 Graph Layers 23 Chunks Type float32 numpy.ndarray",360  181  8036,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.73 MiB
Shape,"(8036, 181, 360)","(365, 181, 360)"
Count,2 Graph Layers,23 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.73 MiB
Shape,"(8036, 181, 360)","(365, 181, 360)"
Count,2 Graph Layers,23 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 90.73 MiB Shape (8036, 181, 360) (365, 181, 360) Count 2 Graph Layers 23 Chunks Type float32 numpy.ndarray",360  181  8036,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.73 MiB
Shape,"(8036, 181, 360)","(365, 181, 360)"
Count,2 Graph Layers,23 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.98 MiB
Shape,"(8036, 181, 360)","(366, 181, 360)"
Count,2 Graph Layers,22 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 90.98 MiB Shape (8036, 181, 360) (366, 181, 360) Count 2 Graph Layers 22 Chunks Type float32 numpy.ndarray",360  181  8036,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,90.98 MiB
Shape,"(8036, 181, 360)","(366, 181, 360)"
Count,2 Graph Layers,22 Chunks
Type,float32,numpy.ndarray


In [15]:
verif_biweekly = verif.rolling(time=15, center=False).mean()
verif_biweekly = verif_biweekly.isel(time=slice(14, None)).assign_coords(time=verif.time.isel(time=slice(None, -14)))
%time verif_biweekly = verif_biweekly.chunk({'time':-1,'lon': 360, 'lat': 181}).persist()

CPU times: user 240 ms, sys: 598 µs, total: 240 ms
Wall time: 509 ms


In [17]:
#verif_biweekly = verif_biweekly.drop("dayofyear")
verif_biweekly

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 1.95 GiB Shape (8022, 181, 360) (8022, 181, 360) Count 1 Graph Layer 1 Chunks Type float32 numpy.ndarray",360  181  8022,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 1.95 GiB Shape (8022, 181, 360) (8022, 181, 360) Count 1 Graph Layer 1 Chunks Type float32 numpy.ndarray",360  181  8022,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 1.95 GiB Shape (8022, 181, 360) (8022, 181, 360) Count 1 Graph Layer 1 Chunks Type float32 numpy.ndarray",360  181  8022,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.95 GiB 1.95 GiB Shape (8022, 181, 360) (8022, 181, 360) Count 1 Graph Layer 1 Chunks Type float32 numpy.ndarray",360  181  8022,

Unnamed: 0,Array,Chunk
Bytes,1.95 GiB,1.95 GiB
Shape,"(8022, 181, 360)","(8022, 181, 360)"
Count,1 Graph Layer,1 Chunks
Type,float32,numpy.ndarray


In [18]:
%time verif_biweekly.to_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/OBS_SUBX.raw.biweekly.geospatial.zarr/",mode="w",consolidated=True)

CPU times: user 17.1 ms, sys: 24.4 ms, total: 41.5 ms
Wall time: 2.63 s


<xarray.backends.zarr.ZarrStore at 0x1459db027c10>

2025-01-07 13:40:49,181 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
