# Concat Operator

* https://xarray.pydata.org/en/stable/user-guide/combining.html
* https://xarray.pydata.org/en/stable/user-guide/io.html#reading-multi-file-datasets

In [1]:
import xarray as xr
import pandas as pd

In [2]:
from pathlib import Path
data_dir = Path("/Users/pingu/data/cmip6-decadal/orig")

In [3]:
cmip6_files = sorted(data_dir.glob('**/*.nc'))
cmip6_files

[PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r10i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r1i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r2i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r3i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r4i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r5i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r6i1p1f1_gn_197011-198012.nc'),
 PosixPath('/Users/pingu/data/cmip6-decadal/orig/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1970-r7i1p1f1_gn_197011-198012.nc'),

In [4]:
ds0 = xr.open_dataset(cmip6_files[0])
ds0

In [5]:
ds0.realization_index

10

In [6]:
datasets = [xr.open_dataset(f) for f in cmip6_files]
len(datasets)

10

In [7]:
realization_values = [ds.realization_index for ds in datasets]
realization_values

[10, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [8]:
realization_dim = pd.Index(realization_values, name="realization")

In [9]:
ds_concat = xr.concat(datasets, dim=realization_dim)
ds_concat

In [10]:
ds_concat.isel(realization=0)

## average over dim realization

In [11]:
ds_mean = ds_concat.mean(dim=realization_dim.name, skipna=True, keep_attrs=True)
ds_mean

In [12]:
ds_mean.to_netcdf("out/cmip6_decdal_mean.nc")

## use open_mfdataset

In [17]:
ds_agg = xr.open_mfdataset(cmip6_files, concat_dim=realization_dim.name, combine='nested')

In [18]:
ds_avg = ds_agg.mean(dim=realization_dim.name, skipna=True, keep_attrs=True)

In [19]:
ds_avg.to_netcdf("out/cmip6_decdal_avg.nc")