In [1]:
import os

import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
import datetime
from matplotlib.ticker import FormatStrFormatter
from matplotlib.ticker import ScalarFormatter

import h5py
import datetime as dt
from scipy.interpolate import griddata
import xarray as xr

In [2]:
FILE_NAME = 'MLS-Aura_L2GP-H2O_v04-23-c01_2019d222.he5'
file = h5py.File(FILE_NAME, 'r')

In [3]:
dset = file['HDFEOS']['SWATHS']['H2O']['Data Fields']['H2O']
data_h2o = dset[:, :]
dset = file['HDFEOS']['SWATHS']['H2O']['Data Fields']['H2OPrecision']
data_h2o_precision = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O']['Data Fields']['Status']
data_h2o_status = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O']['Data Fields']['Quality']
data_quality = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O']['Data Fields']['Convergence']
data_convergence = dset[:]

dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Data Fields']['H2O-APriori']
data_h2o_apriori = dset[:, :]
dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Data Fields']['H2O-APrioriPrecision']
data_h2o_apriori_precision = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Data Fields']['Status']
data_h2o_apriori_status = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Data Fields']['Quality']
data_quality_ = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Data Fields']['Convergence']
data_convergence_ = dset[:]



dset = file['HDFEOS']['SWATHS']['H2O']['Geolocation Fields']['Latitude']
data_lat = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O']['Geolocation Fields']['Longitude']
data_lon = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O']['Geolocation Fields']['Pressure']
data_pres = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O']['Geolocation Fields']['Time']
data_time = FILE_NAME

dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Geolocation Fields']['Latitude']
data_lat_ = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Geolocation Fields']['Longitude']
data_lon_ = dset[:]
dset = file['HDFEOS']['SWATHS']['H2O-APriori']['Geolocation Fields']['Pressure']
data_pres_ = dset[:]

dt_ = datetime.datetime.strptime(data_time[29:33]+"0101", '%Y%m%d') + datetime.timedelta(days=int(data_time[34:37])) - datetime.timedelta(days=1)

data_h2o[data_h2o==-999.99] = np.nan
data_h2o[data_lat==-999.99] = np.nan
data_h2o[data_lon==-999.99] = np.nan

data_h2o_apriori[data_h2o_apriori==-999.99] = np.nan
data_h2o_apriori[data_lat_==-999.99] = np.nan
data_h2o_apriori[data_lon_==-999.99] = np.nan

data_h2o[data_h2o_precision<0] = np.nan
data_h2o[data_quality<=1.45] = np.nan
data_h2o[data_convergence>=2.0] = np.nan
data_h2o[data_h2o_status%2!=0] = np.nan

data_h2o_apriori[data_h2o_apriori_precision<0] = np.nan
data_h2o_apriori[data_quality_<=1.45] = np.nan
data_h2o_apriori[data_convergence_>=2.0] = np.nan
data_h2o_apriori[data_h2o_apriori_status%2!=0] = np.nan

In [4]:
grid_x, grid_y = np.mgrid[-180:180:2.5, -82.5:84.5:2.5]
points = np.random.rand(data_lat.shape[0], 2)
points[:,0] = data_lon[:]
points[:,1] = data_lat[:]

out_data_h2o = np.zeros((1,data_h2o.shape[1], grid_x.shape[1], grid_x.shape[0]))
out_data_h2o_a = np.zeros((1,data_h2o.shape[1], grid_x.shape[1], grid_x.shape[0]))
for i_z in range(data_h2o.shape[1]):
    values = data_h2o[:,i_z]
    values_ = data_h2o_apriori[:,i_z]

    values[values ==-999.99] = np.nan
    values_[values_ ==-999.99] = np.nan
    grid_z0 = griddata(points, values, (grid_x, grid_y), method='nearest')
    grid_z1 = griddata(points, values_, (grid_x, grid_y), method='nearest')
    out_data_h2o[0,i_z,:,:] = grid_z0.T
    out_data_h2o_a[0,i_z,:,:] = grid_z1.T

In [15]:
ds = xr.Dataset({'h2o': (('time', 'levels', 'latitude', 'longitude'), out_data_h2o)},
                        coords={ 'time':np.atleast_1d(dt_),
                                'levels': data_pres,
                                'latitude': grid_y[0,:],
                                'longitude': grid_x[:,0]})

ds['h2o_a'] = (('time', 'levels', 'latitude', 'longitude'), out_data_h2o_a)

ds.h2o.attrs['long_name'] = 'Water Vapor'
ds.h2o_a.attrs['long_name'] = 'Water Vapor apriori'
ds.h2o.attrs['units'] = 'ppmv'
ds.h2o_a.attrs['units'] = 'ppmv'
ds.latitude.attrs['long_name'] = 'latitude'
ds.latitude.attrs['units'] = 'degree_north'
ds.longitude.attrs['long_name'] = 'longitude'
ds.longitude.attrs['units'] = 'degree_east'
ds.levels.attrs['long_name'] = 'pressure'
ds.levels.attrs['units'] = 'hPa'

In [20]:
ds.to_netcdf(FILE_NAME[:-4]+'.nc')