# Netcdf to zarr

This notebook gives you example of netcdf files manipulation and conversion to zarr format

**Cells are not meant to be run. These are examples to illustrate the file format conversions.**

In [None]:
import os
import glob
import xarray as xr

In [None]:
ncroot = os.path.expanduser('~/gmaze/git/github/gmaze/lopsdata/clustering/data/')
ncfile = 'Global_Argo_VerticalMean_Temperature.nc'
ds = xr.open_dataset(os.path.join(ncroot,ncfile))
ds.to_zarr(os.path.join(ncroot,ncfile.replace('.nc','.zarr')))

In [None]:
ncroot = os.path.expanduser('~/gmaze/git/github/gmaze/lopsdata/clustering/data/')
ncfiles = ['GLOB_HOMOGENEOUS_variables_3subset_1.nc','GLOB_HOMOGENEOUS_variables_3subset_2.nc']
ds = xr.open_mfdataset([os.path.join(ncroot,i) for i in ncfiles])
ds = ds.set_coords({'LATITUDE','LONGITUDE','TIME'})
ds.attrs['subset_ID'] = '1-2'
ds.attrs['creationDate'] = '2019/01/22 22:07:00'
print(ds)
ds.to_zarr(os.path.join(ncroot,'GLOB_HOMOGENEOUS_variables.zarr'))

In [None]:
ncroot = os.path.expanduser('~/data/SOMOVAR/WP3/kenneth/testcode/8.isas')
ncfile = 'temp_20180611_try2.nc'
ds = xr.open_dataset(os.path.join(ncroot,ncfile))
print('This dataset holds: %.3f GB' % (ds.nbytes / 1e9))
print(ds)
ncroot = os.path.expanduser('~/gmaze/git/github/obidam/m2poc2019/data/')
# ds.to_zarr(os.path.join(ncroot,ncfile.replace('.nc','.zarr')))

In [None]:
ncroot = os.path.expanduser('~/data/ARGO/copoda_db/setup_H/db_thd_config6_last/gmm')
ncfiles = ["NATL_HOMOGENEOUS_variables_7subset_%i.nc"%(i+1) for i in range(6)]
# print(ncfiles)
ds = xr.open_mfdataset([os.path.join(ncroot,i) for i in ncfiles])\
    .chunk(chunks={'N_PROF':20000})
print('This dataset holds: %.3f MB' % (ds.nbytes / 1e6))
print(ds)
ds.to_zarr(os.path.join(ncroot,ncfiles[0].replace('_7subset_1.nc','.zarr')))

In [None]:
ncroot = os.path.expanduser('~/data/ARGO/copoda_db/setup_H/db_thd_config6_last/gmm')
ncfiles = ["NATL_HOMOGENEOUS_variables_7subset_%i.nc"%(i+1) for i in range(7)]
# print(ncfiles)
ds = xr.open_mfdataset([os.path.join(ncroot,i) for i in ncfiles])\
    .chunk(chunks={'N_PROF':20000})
print('This dataset holds: %.3f MB' % (ds.nbytes / 1e6))
print(ds)
ds.to_zarr(os.path.join(ncroot,ncfiles[0].replace('_7subset_1.nc','_all.zarr')))