In [16]:
# Load relevant libraries
import numpy as np
import h5py

In [17]:
# Create HDF5 File object to extract data from a sample atmospheric dataset pulled from NASA Earthdata
filename = "OMI.L2.TraceGasOMSO2Strip200kmAlongCloudSat.2011.05.26.183648Z.v003.he5"
fi = h5py.File(filename, 'r')

# Display set of dataset dictionary-like keys
set_list = list(fi.keys())
print(set_list)

['HDFEOS', 'HDFEOS INFORMATION']


In [18]:
# Find available table keys within HDFEOS dataset
dataset1 = fi['HDFEOS']
print(dataset1.keys())

<KeysViewHDF5 ['ADDITIONAL', 'SWATHS']>


In [19]:
# Find available table keys within HDFEOS\SWATHS dataset
print(dataset1['SWATHS'].keys())

<KeysViewHDF5 ['OMI Total Column Amount SO2']>


In [20]:
# Find available table keys within HDFEOS\SWATHS\OMI Total Column Amount SO2 dataset on atmospheric sulfur
print(dataset1['SWATHS']['OMI Total Column Amount SO2'].keys())

<KeysViewHDF5 ['Data Fields', 'Geolocation Fields']>


In [21]:
# Display available Data Fields about atmospheric sulfur
print(dataset1['SWATHS']['OMI Total Column Amount SO2']['Data Fields'].keys())

<KeysViewHDF5 ['AlgorithmFlag_PBL', 'AlgorithmFlag_STL', 'AlgorithmFlag_TRL', 'AlgorithmFlag_TRM', 'ChiSquareLfit', 'CloudPressure', 'ColumnAmountO3', 'ColumnAmountSO2_PBL', 'ColumnAmountSO2_PBLbrd', 'ColumnAmountSO2_STL', 'ColumnAmountSO2_STLbrd', 'ColumnAmountSO2_TRL', 'ColumnAmountSO2_TRM', 'ColumnAmountSO2_TRMbrd', 'LayerEfficiency', 'QualityFlags_PBL', 'QualityFlags_STL', 'QualityFlags_TRL', 'QualityFlags_TRM', 'RadiativeCloudFraction', 'Reflectivity331', 'Residual', 'ResidualAdjustment', 'Rlambda1st', 'Rlambda2nd', 'SO2indexP1', 'SO2indexP2', 'SO2indexP3', 'TerrainPressure', 'UVAerosolIndex', 'Wavelength', 'dN_dSO2_STL', 'dN_dSO2_TRL', 'dN_dSO2_TRM', 'deltaO3', 'deltaRefl', 'fc']>


In [22]:
# Display available Geolocation Fields about atmospheric sulfur
print(dataset1['SWATHS']['OMI Total Column Amount SO2']['Geolocation Fields'].keys())

<KeysViewHDF5 ['GroundPixelQualityFlags', 'Latitude', 'Longitude', 'RelativeAzimuthAngle', 'SecondsInDay', 'SolarAzimuthAngle', 'SolarZenithAngle', 'SpacecraftAltitude', 'SpacecraftLatitude', 'SpacecraftLongitude', 'SubsetStartFrameIndices', 'TerrainHeight', 'Time', 'ViewingAzimuthAngle', 'ViewingZenithAngle']>


In [23]:
# Find the dimensions of sulfur dataset
print(dataset1['SWATHS']['OMI Total Column Amount SO2']['Geolocation Fields']['Latitude'].shape)

(1643, 9)


In [24]:
# Use index slicing to obtain data points on latitude/longitude coordinates in Numpy Array form
lat1 = dataset1['SWATHS']['OMI Total Column Amount SO2']['Geolocation Fields']['Latitude'][:1643]
long1 = dataset1['SWATHS']['OMI Total Column Amount SO2']['Geolocation Fields']['Longitude'][:1643]

# Find the horizontal average coordinates in the sulfur observations Numpy Array
lat2 = np.mean(lat1, axis=1)
long2 = np.mean(long1, axis=1)

# Find the indexes of coordinate Numpy Arrays that match the location of the Puyehue volcano in Chile (40 S, 72 W)
# And confirm that there is a range of coinciding matching indexes for latitude and longtitude
lat_matches = np.where(np.logical_and(lat2<=-39, lat2>=-42))
long_matches = np.where(np.logical_and(long2<=-70, long2>=-74))
print(np.intersect1d(lat_matches, long_matches))

[249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
 267 268 269 270 271 272 273]


In [25]:
# Create sample set of sulfur measurements using ColumnAmountSO2_TRL variable and the matching coordinate indexes
sulfur1 = np.mean(dataset1['SWATHS']['OMI Total Column Amount SO2']['Data Fields']['ColumnAmountSO2_TRL'][:1643],
                  axis=1)[249:273]
print(sulfur1)

[-1.26765060e+30 -1.12680054e+30 -1.26765060e+30 -1.26765060e+30
 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30
 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30
 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30
 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30
 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30 -1.26765060e+30]
