https://www.earthdatascience.org/courses/use-data-open-source-python/hierarchical-data-formats-hdf/open-MODIS-hdf4-files-python/


In [None]:
# Import packages
import os
import re  # regular expressions
import warnings
import matplotlib.pyplot as plt
import numpy as np
import numpy.ma as ma
import rasterio as rio
from rasterio.plot import plotting_extent
import geopandas as gpd
import earthpy as et
import earthpy.plot as ep
import earthpy.spatial as es
import earthpy.mask as em

warnings.simplefilter('ignore')

# Set working directory
os.chdir(os.path.join(et.io.HOME, 'nyu', 'rbda_project_data'))


In [None]:
# Create a path to the pre-fire MODIS h4 data
vegetation_path = os.path.join("vegetation_indices_11_28",
                               "MOD13A2.A2019113.h08v05.006.2019129235845.hdf")


In [None]:
# View dataset metadata
with rio.open(vegetation_path) as dataset:
    print(dataset)
    hdf4_meta = dataset.meta

# Notice that there are metadata at the highest level of the file
hdf4_meta


In [None]:
# Print all of the subdatasets in the data
with rio.open(vegetation_path) as dataset:
    crs = dataset.read_crs()
    for name in dataset.subdatasets:
        print(name)


In [None]:
# Create list to append arrays (of all type of data)
vegetation_data = []
vegetation_names = []

# Open the precipitation HDF5 file 
with rio.open(vegetation_path) as dataset:
    
    # loop through each subdataset in HDF5 file
    for name in dataset.subdatasets:
        
        # Open the subdataset 
        with rio.open(name) as subdataset:
            modis_meta = subdataset.profile
            
            # Read data as a  2 dimensional array and append to list
            vegetation_data.append(subdataset.read(1))
            vegetation_names.append(name);
#             np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")


vegetation_data_stacked = np.stack(vegetation_data)

In [None]:
# vegetation_data_stacked

In [None]:
# ep.plot_bands(vegetation_data_stacked,
#               scale=False)
# plt.show()

In [None]:
# Plot MODIS RGB
# ep.plot_rgb(vegetation_data_stacked,
#             rgb=[0, 3, 2],
#             title='RGB Image of MODIS Data',
#             stretch=True,
#             figsize=(7, 7))

# plt.show()


# Process multiple files in a directory
https://realpython.com/working-with-files-in-python/

## Store name of files in file_names list

In [None]:
entries = os.scandir('./')

In [None]:
entries

In [None]:
file_names  = []
with os.scandir('../rbda_project_data/vegetation_indices_all_time_testing/') as entries:
    for entry in entries:
        file_names.append(entry.name)
#         print(entry.name)

In [None]:
file_names

## Process multiple HDF files

In [None]:
os.scandir('./')

In [None]:
# with os.scandir('./vegetation_indices_all_time_testing/') as entries:
#     for entry in entries:
#         file_names.append(entry.name)
#         print(entry.name)

In [None]:
paths_to_files = []
for file in file_names: 
    paths_to_files.append(os.path.join("vegetation_indices_all_time_testing",
                               str(file)))

In [None]:
paths_to_files

In [None]:
# Print all of the subdatasets in the data
for path_to_file in paths_to_files:
    with rio.open(path_to_file) as dataset:
        crs = dataset.read_crs()
        for name in dataset.subdatasets:
            print(name)

### Used paths_to_files[0] to process the first HDF file of dataset

In [None]:
# Create list to append arrays (of all type of data)
vegetation_data_test = []
vegetation_names_test = []

# Open the precipitation HDF5 file 
with rio.open(paths_to_files[0]) as dataset:
    
    # loop through each subdataset in HDF5 file
    for name in dataset.subdatasets:
        
        # Open the subdataset 
        with rio.open(name) as subdataset:
            modis_meta = subdataset.profile
            
            # Read data as a  2 dimensional array and append to list
            vegetation_data_test.append(subdataset.read(1))
            vegetation_names_test.append(name);
#             np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")


# vegetation_data_stacked = np.stack(vegetation_data)

In [None]:
## all NVDI test 
# all_nvdi = []

# for path_to_file in paths_to_files:
    
#     with rio.open(path_to_file) as dataset:
        
#         for name in dataset.subdatasets:
#             if re.search("NDVI.\_1$", name):
            
#                 with rio.open(name) as subdataset:
#                     modis_meta = subdataset.profile
                    
#                     all_nvdi.append(subdataset.read(1))

In [None]:
vegetation_data_test

In [None]:
vegetation_names_test

### Extract only NDVI subdataset from first hdf file

In [None]:
# Create list to append arrays (of all type of data)
ndvi_and_quality = []
ndvi_and_quality_names = []

# Open the precipitation HDF5 file 
with rio.open(paths_to_files[0]) as dataset:
    
    # loop through each subdataset in HDF5 file
    for name in dataset.subdatasets:
        
        # Use regular expression to identify if subdataset has b0 in the name (the bands)
        if re.search("NDVI$", name):

            # Open the subdataset 
            with rio.open(name) as subdataset:
                modis_meta = subdataset.profile

                # Read data as a  2 dimensional array and append to list
                ndvi_and_quality.append(subdataset.read(1))
                ndvi_and_quality_names.append(name);
    #             np.savetxt(str(name)[-10:-1] + ".csv", subdataset.read(1), delimiter=",")
                np.savetxt(ndvi_and_quality_names[-1][54:95] + "_ndvi" + ".csv", subdataset.read(1), delimiter=",")
        
# vegetation_data_stacked = np.stack(vegetation_data)

In [None]:
ndvi_and_quality

In [None]:
ndvi_and_quality_names

In [None]:
ndvi_and_quality_names[0][54:95] + "_ndvi" + ".csv"