In [80]:
import xarray as xr

In [132]:
# Thin netCDF
# Dataset: NUCAPS
# --------------------
# Open file, extract all variables
indir='input/'
odir='output/'

fname = "NUCAPS-EDR_v2r0_j01_s20190425175959_e20190425180557_c20190425180557"
# Note: decoding time set to False to keep prevent xarray from converting to a datetime format
nucaps = xr.open_dataset(indir+fname+'.nc', decode_times=False)
tmp = list(nucaps.variables)

In [133]:
# List vars to leave in file, all others will be removed
keeps = ['Temperature', 'Time', 'Latitude', 'Longitude', 'Pressure']

In [134]:
# Remove the "keep" variables from the full var list
encoding_dict = {}
for keep in keeps:
    tmp.remove(keep)
    # This is optional, but I recommend using zlib at least because it increases file compression
    dict_entry = { keep : {"dtype" : "float32", "zlib" : True} }
    encoding_dict.update(dict_entry)

In [135]:
# remove everything else
nucaps_thinned = nucaps.drop(tmp)

In [136]:
# save file with the same filename, but labelled "_thinned"
nucaps_thinned.to_netcdf(odir+fname+"_thinned.nc", engine="netcdf4", encoding=encoding_dict)

In [113]:
nucaps.close()

In [74]:
# Thin HDF
# Dataset: IMERG
# --------------------
# Open file, extract all variables
fname = '3B-HHR.MS.MRG.3IMERG.20170827-S120000-E122959.0720.V06B'
grp_name="Grid/"

imerg = xr.open_dataset(indir+fname+".HDF5", group = grp_name)
tmp = list(imerg.variables)

In [75]:
keeps = ['precipitationCal', 'lon', 'lat', 'time', 'precipitationQualityIndex' ]

encoding_dict = {}
for keep in keeps:
    tmp.remove(keep)
    dict_entry = { keep : {"dtype": "float32", "zlib": True, '_FillValue': -9999.9} }
    encoding_dict.update(dict_entry)

In [76]:
imerg_thinned = imerg.drop(tmp)

In [77]:
# Option: Keep only data inside a bounding box
coverage = [-94.3, 28.9, -88.8, 33.1]
imerg_thinned_small = imerg_thinned.where(
    (imerg_thinned.lat > 28) & (imerg_thinned.lat < 33) & (imerg_thinned.lon > -94) & (imerg_thinned.lon < -88), drop=True)

In [78]:
imerg_thinned_small.to_netcdf(odir+fname+"_thinned.nc", engine="netcdf4", encoding=encoding_dict)

In [79]:
imerg.close()