# Weighted Average

* https://docs.xarray.dev/en/stable/examples/area_weighted_temperature.html
* https://docs.xarray.dev/en/stable/generated/xarray.DataArray.weighted.html
* https://docs.xarray.dev/en/stable/user-guide/weather-climate.html
* https://towardsdatascience.com/xarray-recipes-for-earth-scientists-c12a10c6a293#c729
* https://towardsdatascience.com/the-correct-way-to-average-the-globe-92ceecd172b7
* https://xcdat.readthedocs.io/en/stable/generated/xarray.Dataset.spatial.average.html
* https://xcdat.readthedocs.io/en/stable/examples/spatial-average.html

In [None]:
import cf_xarray as cfxr
import xarray as xr
import pandas as pd
import numpy as np

## search intake catalog

In [None]:
import intake

In [None]:
cat_url = "https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/master/intake/catalogs/c3s.yaml"

cat = intake.open_catalog(cat_url)
list(cat)

In [None]:
df_cmip6 = cat['c3s-cmip6'].read()
df_cmip6.head()

In [None]:
# c3s-cmip6.ScenarioMIP.INM.INM-CM5-0.ssp245.r1i1p1f1.Amon.rlds.gr1.v20190619

df = df_cmip6.loc[
    (df_cmip6.experiment_id=="ssp245")
    & (df_cmip6.activity_id=="ScenarioMIP")
    & (df_cmip6.institution_id=="INM")
    & (df_cmip6.member_id=="r1i1p1f1")
    & (df_cmip6.table_id=="Amon")
    & (df_cmip6.source_id=="INM-CM5-0")
    & (df_cmip6.variable_id=="rlds")
]
df.head()

In [None]:
ds_path = list(set(df.path))[0]
ds_path

In [None]:
# ds_url = f"https://data.mips.copernicus-climate.eu/thredds/fileServer/esg_c3s-cmip6/{ds_path}"
ds_url = f"http://esgf3.dkrz.de/thredds/fileServer/cmip6/{ds_path}"
ds_url

## download test data

In [None]:
from pathlib import Path
data_dir = Path("/tmp")

In [None]:
!wget $ds_url -O /tmp/test.nc

In [None]:
nc_files = sorted(data_dir.glob('**/test.nc'))
nc_files

## calculate weighted average

In [None]:
# xr.set_options(keep_attrs=True)

from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset

# ds = xr.open_dataset(nc_files[0], use_cftime=True)
ds = open_xr_dataset(nc_files[0].as_posix())
ds

In [None]:
ds['time'] = ds.indexes['time'].to_numpy()
ds = ds.drop_vars(["time_bnds"])
ds

In [None]:
weights = np.cos(np.deg2rad(ds.lat))
weights.name = "weights"
weights.fillna(0)
weights

In [None]:
ds_weighted = ds.weighted(weights)
ds_weighted

In [None]:
weighted_mean = ds_weighted.mean(("lon", "lat"), keep_attrs=True)
weighted_mean