# Calculate averaged target distances over lags 

Uses the outputs of 0-1_target_distance.ipynb

In [8]:
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils import to_monthly

In [15]:
# Parameters
vname = 'sst'
grid = '2x2'
lat_slice = (-10, 10)
lon_slice = (120, 290)
data_dir = '../data/cesm2'
is_scaled = True         # whether to consider the difference in variability over months
lags = [0, -3, -6, -9]   # lags to average over

# Read original target distance
f = f'{data_dir}/target_distance.nc'
msd = xr.open_dataarray(f)

# Read data
da = xr.open_dataarray(f'{data_dir}/{vname}_anomaly_{grid}.nc')
da = da.sel(lat=slice(*lat_slice), lon=slice(*lon_slice))
da_month = to_monthly(da)

# Domain-averaged variance of each month
variance = da_month.var(dim=['ens', 'year']).mean(dim=['lat', 'lon'])

# Scale MSD so that each month contribution is equal
if is_scaled:
    msd = msd / variance

  result = getattr(npmodule, name)(values, axis=axis, **kwargs)


In [10]:
# Stack
msd_stack = msd.stack(time=('year', 'month'))

# Find dates to calculate shadowing distance
dates = pd.DataFrame(list(msd_stack.time.data), columns=['year', 'month'])
dates['day'] = 1
dates = pd.to_datetime(dates).rename('time')
dates = dates[-lags[-1]:]

In [13]:
msd_dates = []
for d in dates:
    msd_lags = []
    for lag in lags:
        # Reference lagged time
        lag_d = d + pd.DateOffset(months=lag)

        # Library target time
        library_year = msd_stack.lyear.data + d.year - lag_d.year

        # Shift time
        ds_shift = msd_stack.sel(time=(lag_d.year, lag_d.month), drop=True
                                ).assign_coords(lyear=library_year)
        
        msd_lags.append(ds_shift)
        
    msd_lag_mean = xr.concat(msd_lags, pd.Index(lags, name='lag')
                             ).mean(dim='lag', skipna=False)
    msd_dates.append(msd_lag_mean)
msd_shadow = xr.concat(msd_dates, dates, join='inner')
msd_shadow = to_monthly(msd_shadow)

In [16]:
# Output file name
lags_str = '_'.join([str(lag) for lag in lags])
if is_scaled:
    outf = f'{f.split(".nc")[0]}_scaled_{lags_str}.nc'
else:
    outf = f'{f.split(".nc")[0]}_{lags_str}.nc'

# Save
encoding = {msd.name: {'dtype': 'float32'}}
msd_shadow.to_netcdf(outf, encoding=encoding)
print(outf)

../data/cesm2/target_distance_scaled_0_-3_-6_-9.nc


In [17]:
msd_shadow