<h1> Demo. Data Access </h1>

This notebook aims at documenting how to access & manipulate the input datasets for one "ocean data challenge".
Two *tar.gz* archives with Sea Surface Height (SSH) datasets are available on the MEOM opendap server.
The **dc_ref** dataset refers to the reference simulation, a.k.a NATL60-CMJ165 nature run carried out by the MEOM Team. The **dc_obs** corresponds to the observations datasets (for various altimeter missions) based on nadir (TOPEX/Poseidon, Jason1, Envisat, Geosat-2) and large swath (SWOT) orbits constructed with the [SWOTsimulator](https://github.com/SWOTsimulator/swotsimulator) package.  

In [None]:
import xarray as xr
import cftime
import geoviews as gv
import matplotlib.pylab as plt
import numpy as np
from datetime import datetime
gv.extension('bokeh')

#### 1) Download & extract **dc_ref** dataset 

In [None]:
%time
import gcsfs
fs = gcsfs.GCSFileSystem('pangeo-181919', requester_pays=True)
mapfilesref = fs.get_mapper('pangeo-meom/data-challenge-test/dc_ref')
dc_ref = xr.open_zarr(mapfilesref)

dc_ref

##### Extract daily mean 1 month sample

In [None]:
time_min = '20130901'
time_max = '20130930'
dc_ref_sample = dc_ref.sel(time=slice(time_min, time_max)).resample(time="1D").mean()
dc_ref_sample

##### Display dc_ref_sample sea surface heigh (variable sossheig) dataset

In [None]:
dataset = gv.Dataset(dc_ref_sample, ['lon', 'lat', 'time'], 'sossheig')
images = dataset.to(gv.Image)
images.opts(cmap='gist_stern', colorbar=True, width=500, height=400)

#### 2) Download & extract **dc_obs** dataset

In [None]:
!wget https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/ocean-data-challenges/dc_data1/dc_obs.tar.gz 

In [None]:
!tar -xvf dc_obs.tar.gz

In [None]:
# Ref start date of NATL60 simulation
simu_start_date = '2012-10-01'

##### Read nadir (Envisat Theoritical track)

In [None]:
def preprocess_time_en(ds):
    # This preprocessing must be avoided in future dataset release
    time_shift_en = 22.10114 # days 
    ds['time'] = cftime.num2date(ds['time'].values - time_shift_en * 24 * 3600, 'seconds since ' + simu_start_date)
    # Change 0 as nan for the first and last cycles
    ds['ssh_obs'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_obs'].values).filled(np.nan)
    ds['ssh_model'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_model'].values).filled(np.nan)
    return ds

ds_en_nadir = xr.open_mfdataset('./dc_obs/en/*.nc', combine='nested', concat_dim='time', preprocess=preprocess_time_en)
ds_en_nadir

##### Read nadir (Geosat-2 Theoritical track)

In [None]:
def preprocess_time_g2(ds):
    # This preprocessing must be avoided in future dataset release
    time_shift_g2 = 15.08489 # days
    ds['time'] = cftime.num2date(ds['time'].values - time_shift_g2 * 24 * 3600, 'seconds since ' + simu_start_date)
    # Change 0 as nan for the first and last cycles
    ds['ssh_obs'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_obs'].values).filled(np.nan)
    ds['ssh_model'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_model'].values).filled(np.nan)
    return ds

ds_g2_nadir = xr.open_mfdataset('./dc_obs/g2/*.nc', combine='nested', concat_dim='time', preprocess=preprocess_time_g2)
ds_g2_nadir

##### Read nadir (Jason-1 Theoritical track)

In [None]:
def preprocess_time_j1(ds):
    # This preprocessing must be avoided in future dataset release
    time_shift_j1 = 3.736615 # days
    ds['time'] = cftime.num2date(ds['time'].values - time_shift_j1 * 24 * 3600, 'seconds since ' + simu_start_date)
    # Change 0 as nan for the first and last cycles
    ds['ssh_obs'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_obs'].values).filled(np.nan)
    ds['ssh_model'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_model'].values).filled(np.nan)
    return ds

ds_j1_nadir = xr.open_mfdataset('./dc_obs/j1/*.nc', combine='nested', concat_dim='time', preprocess=preprocess_time_j1)
ds_j1_nadir

##### Read nadir (Tpn Theoritical track)

In [None]:
def preprocess_time_tpn(ds):
    # This preprocessing must be avoided in future dataset release
    time_shift_tpn = 3.731883 # days
    ds['time'] = cftime.num2date(ds['time'].values - time_shift_tpn * 24 * 3600, 'seconds since ' + simu_start_date)
    # Change 0 as nan for the first and last cycles
    ds['ssh_obs'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_obs'].values).filled(np.nan)
    ds['ssh_model'].values =  np.ma.masked_where(ds['ssh_model'].values == 0., ds['ssh_model'].values).filled(np.nan)  
    return ds

ds_tpn_nadir = xr.open_mfdataset('./dc_obs/tpn/*.nc', combine='nested', concat_dim='time', preprocess=preprocess_time_tpn)
ds_tpn_nadir

##### Read SWOT nadir (Science orbit)

In [None]:
def preprocess_time_swot_nadir(ds):
    # This preprocessing must be avoided in future dataset release
    ds['time'] = cftime.num2date(ds['time'].values, 'seconds since ' + simu_start_date)
    return ds

ds_swot_nadir = xr.open_mfdataset('./dc_obs/swot/BOOST-SWOT_SWOT_nadir_GULFSTREAM_*.nc', combine='nested', concat_dim='time', preprocess=preprocess_time_swot_nadir)
ds_swot_nadir

##### Read SWOT swath (Science orbit)

In [None]:
def preprocess_time_swot_karin(ds):
    # This preprocessing must be avoided in future dataset release
    ds['time'] = cftime.num2date(ds['time'].values, 'seconds since ' + simu_start_date)
    # Duplicate time over across track dimension for swath
    ds = ds.stack(z=('nC', 'time'))
    return ds

ds_swot_karin = xr.open_mfdataset('./dc_obs/swot/BOOST-SWOT_SWOT_GULFSTREAM_c*.nc', preprocess=preprocess_time_swot_karin, combine='nested', concat_dim='z')
ds_swot_karin

##### Make example of nadir and large swath SSH observations, reference SSH fields 

In [None]:
selection_start_date = '2012-10-01'
selection_end_date = '2012-10-10'
central_date = '2012-10-05'

In [None]:
ds_en_nadir_sel = ds_en_nadir.sel(time=slice(selection_start_date, selection_end_date))
ds_g2_nadir_sel = ds_g2_nadir.sel(time=slice(selection_start_date, selection_end_date))
ds_tpn_nadir_sel = ds_tpn_nadir.sel(time=slice(selection_start_date, selection_end_date))
ds_j1_nadir_sel = ds_j1_nadir.sel(time=slice(selection_start_date, selection_end_date))
ds_swot_nadir_sel = ds_swot_nadir.sel(time=slice(selection_start_date, selection_end_date))
ds_swot_karin_sel = ds_swot_karin.sel(time=slice(datetime.strptime(selection_start_date, '%Y-%m-%d'), datetime.strptime(selection_end_date,  '%Y-%m-%d' )))

In [None]:
plt.figure(figsize=(15, 5))
plt.subplot(131)
plt.scatter(ds_en_nadir_sel.lon, ds_en_nadir_sel.lat, c=ds_en_nadir_sel.ssh_model, s=20, cmap = 'gist_stern')
plt.scatter(ds_j1_nadir_sel.lon, ds_j1_nadir_sel.lat, c=ds_j1_nadir_sel.ssh_model, s=20, cmap = 'gist_stern')
plt.scatter(ds_g2_nadir_sel.lon, ds_g2_nadir_sel.lat, c=ds_g2_nadir_sel.ssh_model, s=20, cmap = 'gist_stern')
plt.scatter(ds_tpn_nadir_sel.lon, ds_tpn_nadir_sel.lat, c=ds_tpn_nadir_sel.ssh_model, s=20, cmap = 'gist_stern')
plt.scatter(ds_swot_nadir_sel.lon, ds_swot_nadir_sel.lat, c=ds_swot_nadir_sel.ssh_model, s=20, cmap = 'gist_stern')
plt.xlabel('longitude', fontweight='bold')
plt.ylabel('latitude', fontweight='bold')
plt.title(f'SSH model @ nadirs')
plt.colorbar(orientation='horizontal')
plt.subplot(132)
plt.scatter(ds_swot_karin_sel.lon, ds_swot_karin_sel.lat, c=ds_swot_karin_sel.ssh_model, s=10, cmap = 'gist_stern')
plt.xlabel('longitude', fontweight='bold')
plt.ylabel('latitude', fontweight='bold')
plt.title(f'SSH model @ SWOT swath')
plt.colorbar(orientation='horizontal')
plt.subplot(133)
plt.pcolormesh(dc_ref.lon%360, dc_ref.lat, dc_ref.sossheig.sel(time=central_date).mean(dim='time'), cmap = 'gist_stern')
plt.xlabel('longitude', fontweight='bold')
plt.ylabel('latitude', fontweight='bold')
plt.title(f'SSH model')
plt.colorbar(orientation='horizontal')