In [None]:
import sys
import os
import numpy as np
from glob import glob
from pprint import pprint
from osgeo import gdal
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
year = 2020
tile = '*28v08*.hdf'
path_ = '/path/to/modis/Collection6/L3/MCD64A1-BurnArea/'
output_path = '/path/to/projects/modis_water/data/burn_scar_products'

In [None]:
def getMatFromHDF(hdf, substr, excludeStr):
    hdf = gdal.Open(hdf)
    subd = [sd for sd, _ in hdf.GetSubDatasets() if \
             substr in sd and excludeStr not in sd][0]
    print('Opening: {}'.format(subd))
    del hdf
    ds = gdal.Open(subd)
    mat = ds.GetRasterBand(1).ReadAsArray()
    mat = np.where(mat > 0, 1, 0)
    del ds
    return mat

def getAllFiles(path, year, tile):
    path_to_prepend = os.path.join(path_, str(year))
    subdirs = sorted(os.listdir(path_to_prepend))
    subdirs = [os.path.join(path_to_prepend, subdir) for subdir in subdirs]
    subdirhdfs = [glob(os.path.join(subdir, tile))[0] for subdir in subdirs]
    pprint(subdirhdfs)
    return subdirhdfs

def logical_or_mat(mat_list):
    output_mat = np.empty(mat_list[0].shape)
    for mat in mat_list:
        output_mat = output_mat + mat
        mat = None
    output_mat = np.where(output_mat > 0, 1, 0)
    plt.figure(figsize=(15, 15))    
    plt.matshow(output_mat, fignum=1)
    return output_mat

def setupOutput(year, tile, output_path):
    file_name = 'MCD64A1-BurnArea_Annual_A{}.{}.tif'.format(year, tile.replace('*', '').replace('.hdf', ''))
    dir_output = 'MCD64A1-BurnArea-Annual/{}'.format(year)
    output_dir_full = os.path.join(output_path, dir_output)
    print(output_dir_full)
    if not os.path.exists(output_dir_full):
        os.makedirs(output_dir_full)
    outPath = os.path.join(output_dir_full, file_name)
    print(outPath)
    return outPath

def getRasterInfo(file):
    ds = gdal.Open(file, gdal.GA_ReadOnly)
    subd = [sd for sd, _ in ds.GetSubDatasets() if \
             'Burn Date' in sd and 'Uncertainty' not in sd][0]
    ds = gdal.Open(subd, gdal.GA_ReadOnly)
    geo = ds.GetGeoTransform()
    proj = ds.GetProjection()
    ncols = ds.RasterXSize
    nrows = ds.RasterYSize
    print('Transform')
    print(geo)
    print('Projection')
    print(proj)
    print('Width')
    print(ncols)
    print('Height')
    print(nrows)
    ds = None
    return geo, proj, ncols, nrows

def output_raster(outPath, outmat, geo, proj, ncols, nrows):
    # Output predicted binary raster masked with good-bad mask.
    driver = gdal.GetDriverByName('GTiff')
    outDs = driver.Create(outPath, ncols, nrows, 1, gdal.GDT_Int16, options=['COMPRESS=LZW'])
    outDs.SetGeoTransform(geo)
    outDs.SetProjection(proj)
    outBand = outDs.GetRasterBand(1)
    outBand.WriteArray(outmat)
    outDs.FlushCache()
    outDs = None
    outBand = None
    driver = None

In [None]:
subdirhdfs = getAllFiles(path=path_, year=year, tile=tile)
matSet = [getMatFromHDF(subdir, 'Burn Date', 'Uncertainty') for subdir in subdirhdfs]

In [None]:
outmat = logical_or_mat(matSet)

In [None]:
x, y = np.histogram(outmat)
print(x)
print(y)

In [None]:
outpath = setupOutput(year=year, tile=tile, output_path=output_path)

In [None]:
geo, proj, ncols, nrows = getRasterInfo(subdirhdfs[0])

In [None]:
output_raster(outPath=outpath, outmat=outmat, geo=geo, proj=proj, ncols=ncols, nrows=nrows)