In [7]:
import xarray as xr
import dask
from dask.delayed import delayed
from dask.distributed import Client
import numpy as np
import datetime
import matplotlib.pyplot as plt

In [8]:
nc_file = '/Users/mballaro/Public/scuba_grid_cls/test_case/inputs/alongtrack/sla_al_20140101-20140131.nc'

In [9]:
ds = xr.open_dataset(nc_file)
ds

<xarray.Dataset>
Dimensions:    (time: 1459545)
Coordinates:
  * time       (time) datetime64[ns] 2013-12-31T23:54:41.244443136 ... 2014-01-31T23:07:54.916014336
    longitude  (time) float64 ...
    latitude   (time) float64 ...
Data variables:
    cycle      (time) int16 ...
    track      (time) int16 ...
    SLA        (time) float32 ...
Attributes:
    Conventions:                     CF-1.6
    cdm_data_type:                   Swath
    date_created:                    2018-06-19T11:57:31Z
    date_issued:                     2018-06-19T11:57:31Z
    date_modified:                   2018-06-19T11:57:31Z
    geospatial_lat_max:              81.502809
    geospatial_lat_min:              -78.378573
    geospatial_lat_resolution:       0.041001500000000135
    geospatial_lat_units:            degrees_north
    geospatial_lon_max:              360.001034
    geospatial_lon_min:              0.000334
    geospatial_lon_resolution:       0.01672200000001567
    geospatial_lon_units:   

In [10]:
def gridding(darr, pas):

    """
    """

    try:
        pas_lat = pas[0]
        pas_lon = pas[1]
    except:
        pas_lat = pas
        pas_lon = pas

    # DataArray d'identification des boites (identifiant entier unique pour chaque boite)
    id_boites = xr.DataArray(np.empty(darr.size, dtype=np.int32), dims=darr.dims, name='id_boites')
    id_lon = np.floor(((darr['longitude'].values+360) % 360) / pas_lon).astype(np.int32)
    nb_lon = np.ceil(360 / pas_lon).astype(np.int32)
    id_lat = np.floor((darr['latitude'].values + 90) / pas_lat).astype(np.int32)
    nb_lat = np.ceil(180 / pas_lat).astype(np.int32)
    id_boites.values[:] = id_lon + nb_lon * id_lat
    # coordonnées de la grille
    lon = np.linspace(pas_lon/2, 360-pas_lon/2, nb_lon)
    lat = np.linspace(-90+pas_lat/2, 90-pas_lat/2, nb_lat)

    # Découpage par boite
    # ajout de la coordonnées id_boites
    darr['index_boite'] = ('time',id_boites.values[:])
    # conversion dataset + dask dataframe
    dask_data = xr.Dataset({'data':darr}).to_dask_dataframe()

    # moyenne
    data_avg = dask_data.groupby('index_boite').mean().compute()
    # indice des boites
    ilat = data_avg.index // nb_lon
    ilon = data_avg.index % nb_lon
    # on recase dans la grille
    grid_mean = np.full((nb_lat, nb_lon), np.nan, dtype=np.float64)
    grid_mean[[ilat, ilon]] = data_avg.data
    # DataArray des moyennes
    xr_grid_mean = xr.DataArray(grid_mean, coords=[('latitude', lat), ('longitude', lon)])

    # Idem pour l'écart-type
    data_std = dask_data.groupby('index_boite').std().compute()
    # on recase dans la grille
    grid_std = np.full((nb_lat, nb_lon), np.nan, dtype=np.float64)
    grid_std[[ilat, ilon]] = data_std.data
    # DataArray des comptes
    xr_grid_std = xr.DataArray(grid_std, coords=[('latitude', lat), ('longitude', lon)])

    # Idem pour le nombre de points
    data_counts = dask_data.groupby('index_boite').count().compute()
    # on recase dans la grille
    grid_counts = np.zeros((nb_lat, nb_lon), dtype=np.int32)
    grid_counts[[ilat, ilon]] = data_counts.data
    # DataArray des comptes
    xr_grid_counts = xr.DataArray(grid_counts, coords=[('latitude', lat), ('longitude', lon)])

    dset2 = xr.Dataset({darr.name + '_MEAN': xr_grid_mean, darr.name + '_STD': xr_grid_std, darr.name + '_COUNTS': xr_grid_counts})

    if dset2.longitude.size == 1:
        return dset2.drop('longitude').squeeze('longitude')
    if dset2.latitude.size == 1:
        return dset2.drop('latitude').squeeze('latitude')

    return dset2

In [12]:
ds_res = gridding(ds['SLA'], (2,2))

  df = func(*args, **kwargs)


In [13]:
ds_res

<xarray.Dataset>
Dimensions:     (latitude: 90, longitude: 180)
Coordinates:
  * latitude    (latitude) float64 -89.0 -87.0 -85.0 -83.0 ... 85.0 87.0 89.0
  * longitude   (longitude) float64 1.0 3.0 5.0 7.0 ... 353.0 355.0 357.0 359.0
Data variables:
    SLA_MEAN    (latitude, longitude) float64 nan nan nan nan ... nan nan nan
    SLA_STD     (latitude, longitude) float64 nan nan nan nan ... nan nan nan
    SLA_COUNTS  (latitude, longitude) int32 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0