# Preprocessing of WOD18 data
    - Calculates potential density, N2 and MLD from discrete sample
    - WOD and WOA

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import sklearn as skl
import gsw
import cartopy.crs as ccrs
from scipy.interpolate import interp1d

### First prepare O2, T, S, stratification data from WOD

In [4]:
# observational data 
diro = '/glade/scratch/ito/WOD18_OSDCTD/'
fosd='_1x1bin_osd_'
fctd='_1x1bin_ctd_'
fmer='_1x1bin_merged_'
var=['t','s','o2']

In [5]:
# basin mask
dsm=xr.open_dataset('/glade/work/ito/dataset/model/basin_mask_01.nc')

In [6]:
# first calculate potential density and stratification
def calc_potdens(year):
    #
    N21 = np.zeros((12,47,180,360))*np.nan
    N20 = np.zeros((12,46,180,360))*np.nan
    MLD_125 = np.zeros((12,180,360))*np.nan
    MLD_030 = np.zeros((12,180,360))*np.nan
    z1=np.zeros(46,)
    #
    dt=xr.open_dataset(diro+var[0]+fosd+str(year)+'.nc')
    ds=xr.open_dataset(diro+var[1]+fosd+str(year)+'.nc')
    SA = gsw.SA_from_SP(ds.s,ds.depth,ds.lon,ds.lat)
    CT = gsw.CT_from_pt(SA,dt.t)
    sig0 = gsw.sigma0(SA,CT)
    z=ds.depth.to_numpy()
    Nz=np.size(z)
    sig0=sig0.to_numpy()
    #
    for k in range(Nz-1):
        N20[:,k,:,:]=9.8/1025*(sig0[:,k,:,:]-sig0[:,k+1,:,:])/(z[k]-z[k+1])
        z1[k]=.5*(z[k]+z[k+1])
    #
    for nx in range(360):
        #if nx%10==0:
        #    print(nx)
        for ny in range(180):
            for nm in range(12):
                sigsfc=np.nansum(sig0[nm,:,ny,nx])
                if np.isnan(sigsfc)==False:
                    n21 = interp1d(z1,N20[nm,:,ny,nx],'linear',bounds_error=False,fill_value=np.nan)
                    N21[nm,:,ny,nx]=n21(z)
                    mld0 = interp1d(sig0[nm,:,ny,nx],z,'linear',bounds_error=False,fill_value=np.nan)
                    MLD_125[nm,ny,nx]=mld0(sig0[nm,0,ny,nx]+0.125)
                    MLD_030[nm,ny,nx]=mld0(sig0[nm,0,ny,nx]+0.030)
    N21[:,0,:,:]=N21[:,1,:,:]
    # -------
    # write it out as a combined netCDF file
    da = xr.DataArray(data=sig0,name='sigma0',dims=['mon','depth','lat','lon'],
                           coords={'mon':ds.mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
    dd = da.to_dataset()
    dd['N2'] = xr.DataArray(data=N21,name='N2',dims=['mon','depth','lat','lon'],
                           coords={'mon':ds.mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
    dd['MLD_125'] = xr.DataArray(data=MLD_125,name='MLD_125',dims=['mon','lat','lon'],
                           coords={'mon':ds.mon,'lat':ds.lat,'lon':ds.lon})
    dd['MLD_030'] = xr.DataArray(data=MLD_030,name='MLD_030',dims=['mon','lat','lon'],
                           coords={'mon':ds.mon,'lat':ds.lat,'lon':ds.lon})
    dd['SA'] = SA
    dd['CT'] = CT
    dd.to_netcdf(diro+'TSN2'+fosd+str(year)+'.nc')
    return 1

In [None]:
yrs=np.arange(1965,2021,1)
from multiprocessing import Pool
if __name__ == '__main__':
    with Pool(8) as p:
        print(p.map(calc_potdens, yrs))

In [7]:
# first calculate potential density and stratification
def calc_potdens_ctd(year):
    #
    N21 = np.zeros((12,47,180,360))*np.nan
    N20 = np.zeros((12,46,180,360))*np.nan
    MLD_125 = np.zeros((12,180,360))*np.nan
    MLD_030 = np.zeros((12,180,360))*np.nan
    z1=np.zeros(46,)
    #
    dt=xr.open_dataset(diro+var[0]+fctd+str(year)+'.nc')
    ds=xr.open_dataset(diro+var[1]+fctd+str(year)+'.nc')
    SA = gsw.SA_from_SP(ds.s,ds.depth,ds.lon,ds.lat)
    CT = gsw.CT_from_pt(SA,dt.t)
    sig0 = gsw.sigma0(SA,CT)
    z=ds.depth.to_numpy()
    Nz=np.size(z)
    sig0=sig0.to_numpy()
    #
    for k in range(Nz-1):
        N20[:,k,:,:]=9.8/1025*(sig0[:,k,:,:]-sig0[:,k+1,:,:])/(z[k]-z[k+1])
        z1[k]=.5*(z[k]+z[k+1])
    #
    for nx in range(360):
        #if nx%10==0:
        #    print(nx)
        for ny in range(180):
            for nm in range(12):
                sigsfc=np.nansum(sig0[nm,:,ny,nx])
                if np.isnan(sigsfc)==False:
                    n21 = interp1d(z1,N20[nm,:,ny,nx],'linear',bounds_error=False,fill_value=np.nan)
                    N21[nm,:,ny,nx]=n21(z)
                    mld0 = interp1d(sig0[nm,:,ny,nx],z,'linear',bounds_error=False,fill_value=np.nan)
                    MLD_125[nm,ny,nx]=mld0(sig0[nm,0,ny,nx]+0.125)
                    MLD_030[nm,ny,nx]=mld0(sig0[nm,0,ny,nx]+0.030)
    N21[:,0,:,:]=N21[:,1,:,:]
    # -------
    # write it out as a combined netCDF file
    da = xr.DataArray(data=sig0,name='sigma0',dims=['mon','depth','lat','lon'],
                           coords={'mon':ds.mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
    dd = da.to_dataset()
    dd['N2'] = xr.DataArray(data=N21,name='N2',dims=['mon','depth','lat','lon'],
                           coords={'mon':ds.mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
    dd['MLD_125'] = xr.DataArray(data=MLD_125,name='MLD_125',dims=['mon','lat','lon'],
                           coords={'mon':ds.mon,'lat':ds.lat,'lon':ds.lon})
    dd['MLD_030'] = xr.DataArray(data=MLD_030,name='MLD_030',dims=['mon','lat','lon'],
                           coords={'mon':ds.mon,'lat':ds.lat,'lon':ds.lon})
    dd['SA'] = SA
    dd['CT'] = CT
    dd.to_netcdf(diro+'TSN2'+fctd+str(year)+'.nc')
    return 1

In [None]:
yrs=np.arange(1987,2021,1)
from multiprocessing import Pool
if __name__ == '__main__':
    with Pool(8) as p:
        print(p.map(calc_potdens_ctd, yrs))

In [8]:
# first calculate potential density and stratification
def calc_potdens_mer(year):
    #
    N21 = np.zeros((12,47,180,360))*np.nan
    N20 = np.zeros((12,46,180,360))*np.nan
    MLD_125 = np.zeros((12,180,360))*np.nan
    MLD_030 = np.zeros((12,180,360))*np.nan
    z1=np.zeros(46,)
    #
    dt=xr.open_dataset(diro+var[0]+fmer+str(year)+'.nc')
    ds=xr.open_dataset(diro+var[1]+fmer+str(year)+'.nc')
    SA = gsw.SA_from_SP(ds.s,ds.depth,ds.lon,ds.lat)
    CT = gsw.CT_from_pt(SA,dt.t)
    sig0 = gsw.sigma0(SA,CT)
    z=ds.depth.to_numpy()
    Nz=np.size(z)
    sig0=sig0.to_numpy()
    #
    for k in range(Nz-1):
        N20[:,k,:,:]=9.8/1025*(sig0[:,k,:,:]-sig0[:,k+1,:,:])/(z[k]-z[k+1])
        z1[k]=.5*(z[k]+z[k+1])
    #
    for nx in range(360):
        #if nx%10==0:
        #    print(nx)
        for ny in range(180):
            for nm in range(12):
                sigsfc=np.nansum(sig0[nm,:,ny,nx])
                if np.isnan(sigsfc)==False:
                    n21 = interp1d(z1,N20[nm,:,ny,nx],'linear',bounds_error=False,fill_value=np.nan)
                    N21[nm,:,ny,nx]=n21(z)
                    mld0 = interp1d(sig0[nm,:,ny,nx],z,'linear',bounds_error=False,fill_value=np.nan)
                    MLD_125[nm,ny,nx]=mld0(sig0[nm,0,ny,nx]+0.125)
                    MLD_030[nm,ny,nx]=mld0(sig0[nm,0,ny,nx]+0.030)
    N21[:,0,:,:]=N21[:,1,:,:]
    # -------
    # write it out as a combined netCDF file
    da = xr.DataArray(data=sig0,name='sigma0',dims=['mon','depth','lat','lon'],
                           coords={'mon':ds.mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
    dd = da.to_dataset()
    dd['N2'] = xr.DataArray(data=N21,name='N2',dims=['mon','depth','lat','lon'],
                           coords={'mon':ds.mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
    dd['MLD_125'] = xr.DataArray(data=MLD_125,name='MLD_125',dims=['mon','lat','lon'],
                           coords={'mon':ds.mon,'lat':ds.lat,'lon':ds.lon})
    dd['MLD_030'] = xr.DataArray(data=MLD_030,name='MLD_030',dims=['mon','lat','lon'],
                           coords={'mon':ds.mon,'lat':ds.lat,'lon':ds.lon})
    dd['SA'] = SA
    dd['CT'] = CT
    dd.to_netcdf(diro+'TSN2'+fmer+str(year)+'.nc')
    return 1

In [None]:
yrs=np.arange(1965,2021,1)
from multiprocessing import Pool
if __name__ == '__main__':
    with Pool(8) as p:
        print(p.map(calc_potdens_mer, yrs))

# Prepare the gridded WOA T, S, N2 data

In [None]:
# observational data 
diro = '/glade/scratch/ito/woa18/'
mon=['%.2d' % i for i in np.arange(1,13,1)]
Nz=57
N21=np.zeros((12,Nz,180,360))
sa=np.zeros((12,Nz,180,360))
o2=np.zeros((12,Nz,180,360))
ct=np.zeros((12,Nz,180,360))
sigma0=np.zeros((12,Nz,180,360))
MLD_125=np.zeros((12,180,360))
MLD_030=np.zeros((12,180,360))

In [None]:
for i in range(12):
    fn='/glade/scratch/ito/woa18/woa18_decav_t'+mon[i]+'_01.nc'
    dst=xr.open_dataset(fn,decode_times=False)
    fn='/glade/scratch/ito/woa18/woa18_decav_s'+mon[i]+'_01.nc'
    dss=xr.open_dataset(fn,decode_times=False)
    fn='/glade/scratch/ito/woa18/woa18_all_o'+mon[i]+'_01.nc'
    dso=xr.open_dataset(fn,decode_times=False)
    o2[i,:,:,:]=dso.o_an.to_numpy()
    #
    SA = gsw.SA_from_SP(dss.s_an,dss.depth,dss.lon,dss.lat)
    CT = gsw.CT_from_pt(SA,dst.t_an)
    sig0 = gsw.sigma0(SA,CT)
    z=dss.depth.to_numpy()
    Nz=np.size(z)
    sig0=sig0.to_numpy()
    z1=np.zeros(Nz-1,)
    sigma0[i,:,:,:]=sig0
    N20=np.zeros((1,Nz-1,180,360))
    #
    for k in range(Nz-1):
        N20[0,k,:,:]=9.8/1025*(sig0[:,k,:,:]-sig0[:,k+1,:,:])/(z[k]-z[k+1])
        z1[k]=.5*(z[k]+z[k+1])
    #
    for nx in range(360):
        #if nx%10==0:
        #    print(nx)
        for ny in range(180):
            sigsfc=np.nansum(sig0[:,:,ny,nx])
            if np.isnan(sigsfc)==False:
                n21 = interp1d(z1,N20[0,:,ny,nx],'linear',bounds_error=False,fill_value=np.nan)
                N21[i,:,ny,nx]=n21(z)
                mld0 = interp1d(sig0[0,:,ny,nx],z,'linear',bounds_error=False,fill_value=np.nan)
                MLD_125[i,ny,nx]=mld0(sig0[:,0,ny,nx]+0.125)
                MLD_030[i,ny,nx]=mld0(sig0[:,0,ny,nx]+0.030)
    N21[i,0,:,:]=N21[i,1,:,:]
    sa[i,:,:,:]=SA
    ct[i,:,:,:]=CT

In [None]:
mon=np.arange(1,13,1)
ds=dss
da = xr.DataArray(data=sigma0,name='sigma0',dims=['mon','depth','lat','lon'],
                        coords={'mon':mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
dd = da.to_dataset()
dd['N2'] = xr.DataArray(data=N21,name='N2',dims=['mon','depth','lat','lon'],
                           coords={'mon':mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
dd['O2'] = xr.DataArray(data=o2,name='O2',dims=['mon','depth','lat','lon'],
                           coords={'mon':mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
dd['SA'] = xr.DataArray(data=sa,name='SA',dims=['mon','depth','lat','lon'],
                           coords={'mon':mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
dd['CT'] = xr.DataArray(data=ct,name='CT',dims=['mon','depth','lat','lon'],
                           coords={'mon':mon,'depth':ds.depth,'lat':ds.lat,'lon':ds.lon})
dd['MLD_125'] = xr.DataArray(data=MLD_125,name='MLD_125',dims=['mon','lat','lon'],
                           coords={'mon':mon,'lat':ds.lat,'lon':ds.lon})
dd['MLD_030'] = xr.DataArray(data=MLD_030,name='MLD_030',dims=['mon','lat','lon'],
                           coords={'mon':mon,'lat':ds.lat,'lon':ds.lon})
dd.to_netcdf(diro+'woa18_TSN2.nc')

In [None]:
dd.CT.mean('mon').isel(depth=0).plot()

In [None]:
dd