In [59]:
# this notebook explores working with HDF5 and GOSAT files
# to create a binary, refer to capture/GOSAT L2 CH4 to bin.ipynb

# TANSO-FTS/GOSAT L2 CH4 column amount (SWIR) product

# this cell explores accessing HDF5 attributes.
import h5py
import numpy as np

f = h5py.File("1606280000-01/1606280000-01/SWIR_L2_CH4_CLMN_V02.60_F150915031133042001/F150915031133042001.h5", "r")
print f.name
print f.items()
print f.keys()
print f.values()

f.get("attribute/imageCorner/upperLeftLongitude")[0]
grp = f.get("attribute")
print grp.values()
subgrp = grp["imageCorner"]
print subgrp.items()
def p (x): print x +  " " + x[0]
subgrp.visit(p)
print subgrp["upperLeftLongitude"][0]
print subgrp["upperLeftLatitude"][0]
print subgrp["lowerRightLongitude"][0]
print subgrp["lowerRightLatitude"][0]
data = f.get("Data")
b1 = data["band1Image"]
b1.attrs.items()

In [None]:
#  pretty sure this doesn't work
# see GDAL Translate to GTiff.ipynb for working code to clone back here...
def convert(inputFile):
    f = h5py.File(inputFile, "r")
    ulx = f.get("attribute/imageCorner/upperLeftLongitude")[0]
    uly = f.get("attribute/imageCorner/upperLeftLatitude")[0]
    lrx = f.get("attribute/imageCorner/lowerRightLongitude")[0]
    lry = f.get("attribute/imageCorner/lowerRightLatitude")[0]
  
    proj = '-a_srs "EPSG:4326'
    bounds = '-a_ullr %s %s %s %s' % (ulx, uly, lrx, lry)
    outputFormat = '-of "GTiff"'
    inputString = "'HDF5:" + '"' + inputFile + '"' + "://Data/band1Image'"
    outputFile = inputFile[:-3] + "tiff"
    command = 'gdal_translate ' + proj + ' ' + outputFormat + ' ' + bounds + ' ' + \
          + inputString + ' ' + outputFile
    print command    
    !$command


In [227]:
# extracts lat, lon, time, and CH4 value and saves to a csv

f = h5py.File("c:/Users/markegge/work/ch4/capture/1606280000-01/1606280000-01/GOSATTFTS20150915_02C02SV0260R16062800000.h5", 'r')
numObs = f.get('scanAttribute')['numScan'][0]
lats = f.get('Data').get('geolocation')['latitude'][:]
lons = f.get('Data').get('geolocation')['longitude'][:]
heights = f.get('Data').get('geolocation')['height'][:]
xch4s = f.get('Data').get('mixingRatio')['XCH4'][:]
times = f.get('scanAttribute')['time'][:]
epoch_times = []
for f in times:
    seconds = int((datetime.strptime(f, '%Y-%m-%d %H:%M:%S.%f') - datetime(1970, 1, 1)).total_seconds())
    epoch_times.append(seconds)

vals = np.column_stack((epoch_times, lats, lons, xch4s))
print vals.shape
print vals[0:5]
np.savetxt("test.csv", vals, delimiter=",", fmt=['%10d', '%3.7f', '%3.7f', '%1.7f'])


(194L, 4L)
[[  1.44228670e+09   5.93873863e+01   1.62880127e+02   1.82433510e+00]
 [  1.44228677e+09   5.45988770e+01   1.59531479e+02   1.82656777e+00]
 [  1.44228677e+09   5.45926857e+01   1.59524551e+02   1.82018328e+00]
 [  1.44228678e+09   5.45882874e+01   1.59519211e+02   1.84255111e+00]
 [  1.44228684e+09   5.21493454e+01   1.58092834e+02   1.83938289e+00]]
[[1442286695.0, 59.387386322021484, 162.880126953125, 1.8243350982666016], [1442286768.0, 54.598876953125, 159.53147888183594, 1.8265677690505981], [1442286773.0, 54.59268569946289, 159.52455139160156, 1.820183277130127], [1442286778.0, 54.588287353515625, 159.5192108154297, 1.8425511121749878], [1442286837.0, 52.14934539794922, 158.09283447265625, 1.8393828868865967]]


In [234]:
# extracts lat, lon, time, and CH4 value and saves to a csv and a binary file
# that works with the /html/index.html file

import array
import h5py
import numpy as np
from os import listdir
from os.path import isfile, join
from datetime import datetime

# set directory containing TANSO-FTS/GOSAT L2 CH4 column amount (SWIR) product files
# e.g. "/Users/markegge/ch4/capture/1606280000-01/1606280000-01/GOSATTFTS20150915_02C02SV0260R16062800000.h5"

root_dir = "c:/Users/markegge/work/ch4/capture/1606280000-01/1606280000-01"

def read_file(file_path):
    f = h5py.File(file_path, 'r')
    numObs = f.get('scanAttribute')['numScan'][0]
    lats = f.get('Data').get('geolocation')['latitude'][:]
    lons = f.get('Data').get('geolocation')['longitude'][:]
    heights = f.get('Data').get('geolocation')['height'][:]
    xch4s = f.get('Data').get('mixingRatio')['XCH4'][:]
    times = f.get('scanAttribute')['time'][:]
    epoch_times = []
    for f in times:
        seconds = int((datetime.strptime(f, '%Y-%m-%d %H:%M:%S.%f') - datetime(1970, 1, 1)).total_seconds())
        epoch_times.append(seconds)

    vals = np.column_stack((lons, lats, epoch_times, xch4s))
    return vals

h5files = [f for f in listdir(root_dir) if isfile(join(root_dir, f)) and f[-2:] == 'h5']

data = np.empty([1,4])
first = True
for file in h5files:
    if first:
        data = read_file(join(root_dir, file))
        first = False
    else:
        data = np.vstack((data, read_file(join(root_dir, file))))
    print data.shape

# lons (x), lats (y), epoch_times (unix epoch), xch4s (ppmv)
np.savetxt("values.csv", data, delimiter=",", fmt=['%10d', '%3.7f', '%3.7f', '%1.7f'])  
array.array('f', data.toList()).tofile(open('gosat.bin', 'wb'))

(194L, 4L)
(502L, 4L)
(849L, 4L)
(1215L, 4L)
(1602L, 4L)
(1943L, 4L)
(2331L, 4L)
(2733L, 4L)
(3050L, 4L)
(3343L, 4L)
(3648L, 4L)
(3865L, 4L)
(4130L, 4L)
(4410L, 4L)
(4744L, 4L)


NameError: name 'array' is not defined

In [181]:
# extract out the footprints
f = h5py.File("c:/Users/markegge/work/ch4/capture/1606280000-01/1606280000-01/GOSATTFTS20150915_02C02SV0260R16062800000.h5", 'r')
numObs = f.get('scanAttribute')['numScan'][0]
lats = f.get('Data').get('geolocation')['footPrintLatitude'][:,:]
print lats.shape
lons = f.get('Data').get('geolocation')['footPrintLongitude'][:,:]

from datetime import datetime
def read_file(file_path):
    f = h5py.File(file_path, 'r')
    numObs = f.get('scanAttribute')['numScan'][0]
    lats = f.get('Data').get('geolocation')['latitude'][:]
    lons = f.get('Data').get('geolocation')['longitude'][:]
    heights = f.get('Data').get('geolocation')['height'][:]
    xch4s = f.get('Data').get('mixingRatio')['XCH4'][:]
    times = f.get('scanAttribute')['time'][:]
    epoch_times = []
    for f in times:
        seconds = int((datetime.strptime(f, '%Y-%m-%d %H:%M:%S.%f') - datetime(1970, 1, 1)).total_seconds())
        epoch_times.append(seconds)

    vals = np.column_stack((lons, lats, epoch_times, xch4s))
    return vals

vals = np.concatenate((lats, lons)).reshape(2,194,36)
print vals.shape
print vals[:,0,0]

vals2 = np.dstack((lats,lons))
print vals2.shape

#vals3 = np.split(vals2,[0,0],2)
#print len(vals3)
#print vals3[2]

vals4 = vals2.reshape(194*36,2)
print vals4.shape
print vals4
#print vals

np.savetxt("footprint-test.csv", vals4, delimiter=",", fmt='%3.6f')
#a = np.asarray(vals4)
#a.tofile('footprint-test.csv', sep=',', format='%3.6f')

(194L, 36L)
(2L, 194L, 36L)
[  59.34601212  162.9564209 ]
(194L, 36L, 2L)
(6984L, 2L)
[[  59.34601212  162.9564209 ]
 [  59.34058762  162.94187927]
 [  59.33587265  162.92715454]
 ..., 
 [  23.37578011 -152.12011719]
 [  23.36853027 -152.11378479]
 [  23.36042786 -152.10908508]]
