In [1]:
import os
import time
import numpy as np
from pyhdf.SD import SD, SDC
import h5py

In [2]:
# # boundaries of the selected bounding box
box_name = 'Conus'
solat = 22    # south bound
nolat = 50    # north
welon = -130  # west
ealon = - 60   # east

In [3]:
outdir  = os.path.join('..', 'data', 'tmpa_conus_data')
datadir  = os.path.join('..', 'data', 'tmpa_raw_data')

In [4]:
chnunkshape = (1, 1, 1000)
start_time  = time.time() # track execution time
filenames   = sorted([f for f in os.listdir(datadir) if f.endswith('.HDF')], 
                key = lambda name: name[5:13]+name[14:16])
# filenames = filenames[:1000:]
numfiles    = np.size(filenames)
print('Number of files: ', numfiles)

Number of files:  2433


In [5]:
lat    = np.arange(-49.875, 49.876, 0.25) # South to North
lon   = np.arange(-179.875, 179.876, 0.25) # West to East
nlon        = np.size(lon)
nlat        = np.size(lat)

In [6]:
# mask arrays for selected  bounding box
bblat = np.logical_and(lat >= solat, lat <= nolat)
bblon = np.logical_and(lon >= welon, lon <= ealon)

boxlat = lat[bblat]
boxlon = lon[bblon]

boxx = np.arange(nlon, dtype=int)[bblon]
boxy = np.arange(nlat, dtype=int)[bblat]

nblat = np.size(boxy)
nblon = np.size(boxx)

In [7]:
with h5py.File( os.path.join(outdir, 'data_tmpa_3h.hdf5'), 'w') as f:
    for tt in range(numfiles):
        print(tt, filenames[tt])
        # read
        fullname = os.path.join(datadir, filenames[tt])
        hdf      = SD(fullname, SDC.READ)
        # read only prcp over conus
        prcpmat_rates = hdf.select('precipitation')[int(boxx[0]):int(
                                boxx[-1]+1), int(boxy[0]):int(boxy[-1]+1)]
        prcpmat = prcpmat_rates*3 # accumulations
        if tt == 0:
            dset = f.create_dataset('prcp', (nblon, nblat, numfiles),
                                        chunks = chnunkshape , dtype = 'f')
            dset[ :,:, tt] = prcpmat # save accumulations

            dset2 = f.create_dataset('lat', (nblat,), dtype = 'f')
            dset2[:] = boxlat
            dset3 = f.create_dataset('lon', (nblon,), dtype = 'f')
            dset3[:] = boxlon
            dset4 = f.create_dataset('dates', (numfiles,), dtype = 'int32')
            # dset4[:] = hours
            dset4[tt]=int(filenames[tt][5:13])
            dset5 = f.create_dataset('hours', (numfiles,), dtype = 'int32')
            # dset5[:] = dates
            dset5[tt]=int(filenames[tt][14:16])
            dset.attrs['north_bound'] = nolat
            dset.attrs['south_bound'] = solat
            dset.attrs['west_bound'] = welon
            dset.attrs['east_bound'] = ealon
            dset.attrs['start_date'] =filenames[0][5:13]
            dset.attrs['start_time'] =filenames[0][14:16]
            dset.attrs['end_date'] =filenames[-1][5:13]
            dset.attrs['end_time'] =filenames[-1][14:16]
            dset.attrs['variable'] = 'PRCP 3-hr ACCUMULATION [mm]'
            dset.attrs['time_res'] = '3h'
            dset.attrs['space_res'] = '0.25deg'
            dset.attrs['chunks_shape'] = '1x1x1000'
            dset.attrs['first_corner'] = 'south_west as in original dataset'
            dset.attrs['rows'] = 'longitude (as in the original TMPA dataset)'
            dset.attrs['cols'] = 'latitude (as in the original TMPA datset)'
            print(dset.shape)
        else:
            dset[ :,:, tt] = prcpmat
            dset4[tt]=int(filenames[tt][5:13])
            dset5[tt]=int(filenames[tt][14:16])

# TIME of EXECUTION of the script
execution_time = time.time() - start_time
print('extract_bounding_box:')
print("---execution time was %s minutes ---" % (execution_time/60))

0 3B42.20080101.00.7A.HDF
(280, 112, 2433)
1 3B42.20180101.00.7.HDF
2 3B42.20180101.03.7.HDF
3 3B42.20180101.06.7.HDF
4 3B42.20180101.09.7.HDF
5 3B42.20180101.12.7.HDF
6 3B42.20180101.15.7.HDF
7 3B42.20180101.18.7.HDF
8 3B42.20180101.21.7.HDF
9 3B42.20180102.00.7.HDF
10 3B42.20180102.03.7.HDF
11 3B42.20180102.06.7.HDF
12 3B42.20180102.09.7.HDF
13 3B42.20180102.12.7.HDF
14 3B42.20180102.15.7.HDF
15 3B42.20180102.18.7.HDF
16 3B42.20180102.21.7.HDF
17 3B42.20180103.00.7.HDF
18 3B42.20180103.03.7.HDF
19 3B42.20180103.06.7.HDF
20 3B42.20180103.09.7.HDF
21 3B42.20180103.12.7.HDF
22 3B42.20180103.15.7.HDF
23 3B42.20180103.18.7.HDF
24 3B42.20180103.21.7.HDF
25 3B42.20180104.00.7.HDF
26 3B42.20180104.03.7.HDF
27 3B42.20180104.06.7.HDF
28 3B42.20180104.09.7.HDF
29 3B42.20180104.12.7.HDF
30 3B42.20180104.15.7.HDF
31 3B42.20180104.18.7.HDF
32 3B42.20180104.21.7.HDF
33 3B42.20180105.00.7.HDF
34 3B42.20180105.03.7.HDF
35 3B42.20180105.06.7.HDF
36 3B42.20180105.09.7.HDF
37 3B42.20180105.12.7.HDF
38 3