https://www.earthdatascience.org/courses/use-data-open-source-python/hierarchical-data-formats-hdf/open-MODIS-hdf4-files-python/


In [1]:
# Import packages
import os
import re  # regular expressions
import warnings
import matplotlib.pyplot as plt
import numpy as np
import numpy.ma as ma
import rasterio as rio
from rasterio.plot import plotting_extent
import geopandas as gpd
import earthpy as et
import earthpy.plot as ep
import earthpy.spatial as es
import earthpy.mask as em

warnings.simplefilter('ignore')

# Set working directory
os.chdir(os.path.join(et.io.HOME, 'nyu', 'rbda_project_data'))


In [2]:
# Create a path to the pre-fire MODIS h4 data
vegetation_path = os.path.join("vegetation_indices_11_28",
                               "MOD13A2.A2019113.h08v05.006.2019129235845.hdf")


In [3]:
# View dataset metadata
with rio.open(vegetation_path) as dataset:
    print(dataset)
    hdf4_meta = dataset.meta

# Notice that there are metadata at the highest level of the file
hdf4_meta


<open DatasetReader name='vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf' mode='r'>


{'driver': 'HDF4',
 'dtype': 'float_',
 'nodata': None,
 'width': 512,
 'height': 512,
 'count': 0,
 'crs': None,
 'transform': Affine(1.0, 0.0, 0.0,
        0.0, 1.0, 0.0)}

In [4]:
# Print all of the subdatasets in the data
with rio.open(vegetation_path) as dataset:
    crs = dataset.read_crs()
    for name in dataset.subdatasets:
        print(name)


HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI
HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days relative azimuth angle
HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days composite day of the year
HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days pixel reliability
HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days EVI
HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days VI Quality
HDF4_EOS:EOS_GRID:vegetation_indices_11_28/MOD13A2.A2019113.h08v05.006.2019129235845.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days red reflectance
HDF4_EOS:EOS_GR

In [5]:
# Create list to append arrays (of all type of data)
vegetation_data = []
vegetation_names = []

# Open the precipitation HDF5 file 
with rio.open(vegetation_path) as dataset:
    
    # loop through each subdataset in HDF5 file
    for name in dataset.subdatasets:
        
        # Open the subdataset 
        with rio.open(name) as subdataset:
            modis_meta = subdataset.profile
            
            # Read data as a  2 dimensional array and append to list
            vegetation_data.append(subdataset.read(1))
            vegetation_names.append(name);
#             np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")


vegetation_data_stacked = np.stack(vegetation_data)

In [6]:
# vegetation_data_stacked

In [7]:
# ep.plot_bands(vegetation_data_stacked,
#               scale=False)
# plt.show()

In [8]:
# Plot MODIS RGB
# ep.plot_rgb(vegetation_data_stacked,
#             rgb=[0, 3, 2],
#             title='RGB Image of MODIS Data',
#             stretch=True,
#             figsize=(7, 7))

# plt.show()


# Process multiple files in a directory
https://realpython.com/working-with-files-in-python/

## Store name of files in file_names list

In [9]:
entries = os.scandir('./')

In [10]:
entries

<posix.ScandirIterator at 0x7fbf11089c70>

In [11]:
file_names  = []
with os.scandir('../rbda_project_data/vegetation_indices_all_time_testing/') as entries:
    for entry in entries:
        file_names.append(entry.name)
#         print(entry.name)

In [12]:
file_names

['MOD13A2.A2000113.h08v05.006.2015137035140.hdf',
 'MOD13A2.A2000065.h08v05.006.2015136022932.hdf',
 'MOD13A2.A2000161.h08v05.006.2015137044400.hdf',
 'MOD13A2.A2000049.h08v05.006.2015136104428.hdf',
 'MOD13A2.A2000145.h08v05.006.2015137095459.hdf',
 'MOD13A2.A2000193.h08v05.006.2015138073343.hdf',
 'MOD13A2.A2000145.h08v04.006.2015137094710.hdf',
 'MOD13A2.A2000177.h08v04.006.2015138073355.hdf',
 'MOD13A2.A2000113.h08v04.006.2015137032541.hdf',
 'MOD13A2.A2000177.h08v05.006.2015138071538.hdf',
 'MOD13A2.A2000129.h08v04.006.2015137052605.hdf',
 'MOD13A2.A2000049.h08v04.006.2015136104431.hdf',
 'MOD13A2.A2000081.h08v04.006.2015136041919.hdf',
 'MOD13A2.A2000161.h08v04.006.2015137045247.hdf',
 'MOD13A2.A2000097.h08v05.006.2015136040245.hdf',
 'MOD13A2.A2000081.h08v05.006.2015136041359.hdf',
 'MOD13A2.A2000097.h08v04.006.2015136041351.hdf',
 'MOD13A2.A2000129.h08v05.006.2015137052212.hdf',
 'MOD13A2.A2000193.h08v04.006.2015138073331.hdf',
 'MOD13A2.A2000065.h08v04.006.2015136023134.hdf']

## Process multiple HDF files

In [13]:
os.scandir('./')

<posix.ScandirIterator at 0x7fbf11089730>

In [14]:
# with os.scandir('./vegetation_indices_all_time_testing/') as entries:
#     for entry in entries:
#         file_names.append(entry.name)
#         print(entry.name)

In [15]:
paths_to_files = []
for file in file_names: 
    paths_to_files.append(os.path.join("vegetation_indices_all_time_testing",
                               str(file)))

In [16]:
paths_to_files

['vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000065.h08v05.006.2015136022932.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000161.h08v05.006.2015137044400.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000049.h08v05.006.2015136104428.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v05.006.2015137095459.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v05.006.2015138073343.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000177.h08v04.006.2015138073355.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v04.006.2015137032541.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000177.h08v05.006.2015138071538.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000129.h08v04.006.2015137052605.hdf',
 'vegetation_indices_all_time_testing/MOD13A2.A2000049

In [17]:
# Print all of the subdatasets in the data
for path_to_file in paths_to_files:
    with rio.open(path_to_file) as dataset:
        crs = dataset.read_crs()
        for name in dataset.subdatasets:
            print(name)

HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days relative azimuth angle
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days composite day of the year
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days pixel reliability
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days EVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days VI Quality
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.201513703

HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days relative azimuth angle
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days composite day of the year
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days pixel reliability
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days EVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.2015137094710.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days VI Quality
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000145.h08v04.006.201513709

HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.2015136041919.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.2015136041919.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days relative azimuth angle
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.2015136041919.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days composite day of the year
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.2015136041919.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days pixel reliability
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.2015136041919.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days EVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.2015136041919.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days VI Quality
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000081.h08v04.006.201513604

HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.2015138073331.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.2015138073331.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days relative azimuth angle
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.2015138073331.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days composite day of the year
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.2015138073331.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days pixel reliability
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.2015138073331.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days EVI
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.2015138073331.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days VI Quality
HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000193.h08v04.006.201513807

### Used paths_to_files[0] to process the first HDF file of dataset

In [18]:
# Create list to append arrays (of all type of data)
vegetation_data_test = []
vegetation_names_test = []

# Open the precipitation HDF5 file 
with rio.open(paths_to_files[0]) as dataset:
    
    # loop through each subdataset in HDF5 file
    for name in dataset.subdatasets:
        
        # Open the subdataset 
        with rio.open(name) as subdataset:
            modis_meta = subdataset.profile
            
            # Read data as a  2 dimensional array and append to list
            vegetation_data_test.append(subdataset.read(1))
            vegetation_names_test.append(name);
#             np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")


# vegetation_data_stacked = np.stack(vegetation_data)

In [19]:
## all NVDI test 
# all_nvdi = []

# for path_to_file in paths_to_files:
    
#     with rio.open(path_to_file) as dataset:
        
#         for name in dataset.subdatasets:
#             if re.search("NDVI.\_1$", name):
            
#                 with rio.open(name) as subdataset:
#                     modis_meta = subdataset.profile
                    
#                     all_nvdi.append(subdataset.read(1))

In [20]:
vegetation_data_test

[array([[-3000, -3000, -3000, ...,  2469,  2233,  1909],
        [-3000, -3000, -3000, ...,  1858,  1862,  1567],
        [-3000, -3000, -3000, ...,  2110,  6304,  1412],
        ...,
        [ 2444,  2580,  2598, ...,  1458,  1565,  1758],
        [ 2486,  2511,  2453, ...,  2077,  1866,  1691],
        [ 2427,  2371,  2373, ...,  1686,  1764,  1728]], dtype=int16),
 array([[-4000, -4000, -4000, ..., -4844, -5385, -5386],
        [-4000, -4000, -4000, ..., -5095, -5095, -5095],
        [-4000, -4000, -4000, ..., -5127, -4877, -5096],
        ...,
        [13379, 13353, 13351, ..., 13099, 13097, 13096],
        [13378, 13376, 13374, ..., 13119, 13118, 13094],
        [13396, 13386, 13384, ..., 13140, 13127, 13126]], dtype=int16),
 array([[ -1,  -1,  -1, ..., 124, 115, 115],
        [ -1,  -1,  -1, ..., 115, 115, 115],
        [ -1,  -1,  -1, ..., 115, 124, 115],
        ...,
        [115, 115, 115, ..., 114, 114, 114],
        [115, 115, 115, ..., 114, 114, 114],
        [115, 115, 115

In [21]:
vegetation_names_test

['HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI',
 'HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days relative azimuth angle',
 'HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days composite day of the year',
 'HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days pixel reliability',
 'HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days EVI',
 'HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days VI Quality',
 'HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A20

### Extract only NDVI subdataset from first hdf file

In [60]:
# Create list to append arrays (of all type of data)
ndvi_and_quality = []
ndvi_and_quality_names = []

# Open the precipitation HDF5 file 
with rio.open(paths_to_files[0]) as dataset:
    
    # loop through each subdataset in HDF5 file
    for name in dataset.subdatasets:
        
        # Use regular expression to identify if subdataset has b0 in the name (the bands)
        if re.search("NDVI$", name):

            # Open the subdataset 
            with rio.open(name) as subdataset:
                modis_meta = subdataset.profile

                # Read data as a  2 dimensional array and append to list
                ndvi_and_quality.append(subdataset.read(1))
                ndvi_and_quality_names.append(name);
    #             np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")
                np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")
        
# vegetation_data_stacked = np.stack(vegetation_data)

In [61]:
ndvi_and_quality

[array([[-3000, -3000, -3000, ...,  2469,  2233,  1909],
        [-3000, -3000, -3000, ...,  1858,  1862,  1567],
        [-3000, -3000, -3000, ...,  2110,  6304,  1412],
        ...,
        [ 2444,  2580,  2598, ...,  1458,  1565,  1758],
        [ 2486,  2511,  2453, ...,  2077,  1866,  1691],
        [ 2427,  2371,  2373, ...,  1686,  1764,  1728]], dtype=int16)]

In [62]:
ndvi_and_quality_names

['HDF4_EOS:EOS_GRID:vegetation_indices_all_time_testing/MOD13A2.A2000113.h08v05.006.2015137035140.hdf:MODIS_Grid_16DAY_1km_VI:1 km 16 days NDVI']

In [63]:
ndvi_and_quality_names[0][54:95] + "_ndvi" + ".csv"

'MOD13A2.A2000113.h08v05.006.2015137035140_ndvi.csv'