In [1]:
import numpy as np
import pandas as pd
import xarray as xr

import os
from glob import glob

import matplotlib.pyplot as plt

import histlib.matchup as match
import histlib.diagnosis as diag
from histlib.cstes import labels, zarr_dir, matchup_dir, var
from histlib.matchup import _data_var, _stress_var, _aviso_var

from xhistogram.xarray import histogram


import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.geodesic as cgeo
crs = ccrs.PlateCarree()
import cmocean.cm as cm




In [2]:
l = labels[0]

In [3]:
if True:
    from dask.distributed import Client
    from dask_jobqueue import PBSCluster
    #cluster = PBSCluster(cores=56, processes=28, walltime='04:00:00')
    #cluster = PBSCluster(cores=7, processes=7, walltime='04:00:00')
    cluster = PBSCluster(cores=28, processes=28, walltime='08:00:00')
    w = cluster.scale(jobs=1)
else:
    from dask.distributed import Client, LocalCluster
    cluster = LocalCluster()

client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.0.201:8787/status,

0,1
Dashboard: http://10.148.0.201:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.201:52983,Workers: 0
Dashboard: http://10.148.0.201:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## IMPORT DATA, build dataset
We choose to get only erastar wind term, and we reject PEACHY data, and we select only deltaT<1H

In [4]:
DL =25e3 #meters
DT = int(1800) #seconds
dl = 5

var =[
    'drifter_acc_x_0',
    'drifter_acc_y_0',
    'drifter_coriolis_x_0',
    'drifter_coriolis_y_0',
    'es_cstrio_z15_alti_wd_x',
    'es_cstrio_z15_drifter_wd_x',
    'es_cstrio_z15_alti_wd_y',
    'es_cstrio_z15_drifter_wd_y',
    'alti_ggx_adt_filtered',
    'alti_ggx_adt_filtered_ocean_tide',
    'alti_ggx_adt_filtered_ocean_tide_internal_tide',
    'aviso_alti_ggx_adt',
    'aviso_drifter_ggx_adt',
    'aviso_alti_ggy_adt',
    'aviso_drifter_ggy_adt',
]

def bin_lonlat_ms(ds,l, dl):
    dsm = match.add_except_sum(ds)   
    dsm = dsm.reset_coords(['lon', 'lat', 'time']).drop(['id_comb', 'time'])
    dfm = dsm.to_dask_dataframe().set_index('obs')
    dfm["latbin"] = (dfm.lat // dl) * dl
    dfm["lonbin"] = (dfm.lon // dl) * dl
    #ms
    d2 = dfm.drop(['lon', 'lat', 'lonbin', 'latbin'], axis=1)**2
    dd = dfm[['lonbin', 'latbin']].merge(d2)
    dd = dd.groupby(["latbin", "lonbin"]).mean()
    #count
    dnb = dfm.reset_index()[['obs', 'latbin', 'lonbin']].groupby(["latbin", "lonbin"]).count().obs.compute().to_xarray()
    dsms = dd.compute().to_xarray()
    #attrs
    for v in list(dsms.variables) :
        if v in ds :
            dsms[v].attrs = ds[v].attrs
    #merge
    dso = xr.merge([dsms, dnb.rename('nb_coloc_bin')])
    dso['drifter_sat_year']=l
    dso = dso.expand_dims('drifter_sat_year')
    dso = dso.set_coords('drifter_sat_year')
    # center lon, lat bins + reindex to have same for all
    lon_bins, lat_bins = np.arange(-180, 180, dl), np.arange(-90, 90, dl)
    dso = dso.reindex({'lonbin':lon_bins, 'latbin':lat_bins})
    dso['lonbin'] = dso['lonbin']+dl/2
    dso['latbin'] = dso['latbin']+dl/2

    return dso

def run_mslonlat(l):
    """main execution code"""
    dsm = xr.open_dataset(os.path.join(matchup_dir, f'matchup_{l}.zarr'))[var+['drogue_status', 'alti___distance', 'alti___time_difference']].dropna('obs').chunk({'obs':500})
    dsm = dsm.where(dsm.alti___distance<=DL, drop=True)
    dsm = dsm.where(dsm.alti___time_difference<=DT, drop=True).drop(['alti___distance', 'alti___time_difference'])
    dsmd = dsm.where(dsm.drogue_status, drop=True).drop('drogue_status')
    dsmnd = dsm.where(np.logical_not(dsm.drogue_status), drop=True).drop('drogue_status')
    dsm = dsm.drop('drogue_status')
    zarr = os.path.join(zarr_dir+'_ok',f'mslonlat/mslonlat_{int(DL//1000)}_{DT}_{dl}_{l}.zarr')
    if not os.path.isdir(zarr) :
        bin_lonlat_ms(dsm,l, dl).to_zarr(zarr,encoding={'drifter_sat_year':{'dtype':'U32'}}, mode='w')
        logging.info(f"{l} storred in {zarr}")
    if dsmd.dims['obs']!=0 : 
        zarrd = os.path.join(zarr_dir+'_ok',f'mslonlat/mslonlat_{int(DL//1000)}_{DT}_{dl}_drogued_{l}.zarr')
        if not os.path.isdir(zarrd) :
            bin_lonlat_ms(dsmd,l, dl).to_zarr(zarrd, encoding={'drifter_sat_year':{'dtype':'U32'}},mode='w')
            logging.info(f"{l} storred in {zarrd}")
    if dsmnd.dims['obs']!=0 :
        zarrud = os.path.join(zarr_dir+'_ok',f'mslonlat/mslonlat_{int(DL//1000)}_{DT}_{dl}_undrogued_{l}.zarr')
        if not os.path.isdir(zarrud) :
            bin_lonlat_ms(dsmnd,l, dl).to_zarr(zarrud,encoding={'drifter_sat_year':{'dtype':'U32'}}, mode='w')
            logging.info(f"{l} storred in {zarrud}")
    

    

# Test 1 label

In [5]:
dsm = xr.open_dataset(os.path.join(matchup_dir, f'matchup_{l}.zarr'))[var+['drogue_status', 'alti___distance', 'alti___time_difference']].dropna('obs').chunk({'obs':500})
dsm = dsm.where(dsm.alti___distance<=DL, drop=True)
dsm = dsm.where(dsm.alti___time_difference<=DT, drop=True).drop(['alti___distance', 'alti___time_difference'])
dsmd = dsm.where(dsm.drogue_status, drop=True).drop('drogue_status')
dsmnd = dsm.where(np.logical_not(dsm.drogue_status), drop=True).drop('drogue_status')
dsm = dsm.drop('drogue_status')
ds = bin_lonlat_ms(dsm,l, dl)

In [6]:
ds

In [11]:
files = glob(os.path.join(zarr_dir+'_ok',f'mslonlat/mslonlat_{int(DL//1000)}_{DT}_{dl}_drogued_*.zarr'))
files = [f for f in files if 'gps' in f]

ds = xr.open_dataset(files[3])

In [14]:
for f in files :
    ds = xr.open_dataset(f)
    print(len(ds))

66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66
66


In [16]:
cluster.close()