# Creating data cube stacks

## 1. Importing libraries

Libraries reference:

+ [gdal](https://gdal.org/api/python.html)
+ [numpy](https://numpy.org/install/)
+ [glob](https://docs.python.org/3/library/glob.html)
+ [subprocess](https://docs.python.org/3/library/subprocess.html)
+ [time](https://docs.python.org/3/library/time.html)
+ [os](https://docs.python.org/3/library/os.html)

In [None]:
import gdal
import glob
import numpy
import subprocess
import time
import datetime
from os.path import isfile
import os

## 2. Changing working folder location

In [None]:
# folder where all data is stored
os.chdir(os.getcwd().rsplit('/',2)[0]+'/Data')

## 3. Creating cubes

In [None]:
# Creating a folder to save the data cube stacks
# os.makedirs('./cubes/raw')
save_folder = './cubes/raw/'

# Year to create the data cube stacks. 
years = [2019]

# Revisit time according to the sensor
revisit_time = 5 # 16 for Landsat

# The BDC grid cells to used to create the cubes.
# This example is for Sentinel Data in western Bahia. This information
# can be obtained from the name of the downloaded bands.
# The cubes region is defined by their cell in BDC.
cells = ['089098']

# Band to create stacks with. One stack per year per band per cell is created.
bands = ['band2', 'band3', 'band4', 'band8a', 'band11', 'band12', 'NDVI', 'EVI', 'Fmask4']
    # For Landsat:
    #     ['band2', 'band3', 'band4', 'band5', 'band6', 'band7', 'NDVI', 'EVI', 'Fmask4']

for year in years:
    for cell in cells:
        print('-------------- '+cell+' - '+str(year)+'--------------')
        files_path =  f'./bands/clipped/{cell}/*{cell}*'
        
        cube_identifier = f'{year}.{cell}'
        
        # A band full of nodata (-9999) must be provided with the same dimensions of the
        # clipped bands to be used in the Cube creation. Same width and height in pixels,
        # exactly the same boundaries, same EPSG.
        # This is used as a placeholder for the dates that do not have images available 
        # even though they are expected to exist. It will be completely filled during 
        # the gap filling process.
        empty_path = f'./ref/nodata_{cell}.tif'

        # Start dates for the cubes according to the year. The start date must exist in the
        # downloaded files.
        if year == 2018:
            start_date = '2017-07-04'
        elif year == 2019:
            start_date = '2018-07-04'
        
        # End date for the stacks.
        end_date = f'{year}-08-31'
        
        # Dates in datetime format.
        ad = datetime.datetime.strptime(start_date, '%Y-%m-%d') # actual date
        ed = datetime.datetime.strptime(end_date, '%Y-%m-%d')   # end date
        
        # Array with all expected images in the cube. The interval between them is
        # always the same and equal to the revisit time.
        dates = []

        while ad <= ed:
            dates.append(ad)
            ad = ad + datetime.timedelta(days=revisit_time)

        # Iterating through the bands to create one data cube stack for each one of them.
        for i in range(len(bands)):
            time_start = time.time()
            print('---------- Cube '+bands[i]+' ----------')
            print('Getting files path...')
            
            # Array with the path of the bands that will be used in the stack. For dates that have no image available,
            # the 'empty' placeholder is used.
            files = []
            days = []
            for date in dates:
                date_str = str(date.year)+'-'+str(date.month).zfill(2)+'-'+str(date.day).zfill(2)
                days.append(date_str)
                file = glob.glob(f'./bands/clipped/*{cell}*{date_str}*{bands[i]}*.tif')
                if file:
                    files.append(file[0])
                else:
                    files.append(empty_path)
            
            # Saves the days array.
            days = numpy.asarray(days)
            print('Total of images:', len(files))

            print('Saving days...')
            numpy.save(save_folder+'days.'+cube_identifier+'.npy', days)
            
            # creates a VRT that is afterwards translated
            print('Creating VRT...')
            files_str = ''
            for file in files:
                files_str = files_str+file+' '

            vrt_path  = f'{save_folder}{cube_identifier}.{bands[i]}.vrt'
            cube_path = f'{save_folder}{cube_identifier}.{bands[i]}.tif'

            command = 'gdalbuildvrt -vrtnodata -9999 -separate {0} {1}'.format(vrt_path, files_str)
            !{command}

            print('Creating cube...')
            command = 'gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -of GTiff {0} {1}'.format(vrt_path, cube_path)
            !{command}

            print('Elapsed time: %.3f minutes.' % ((time.time()-time_start)/60))
            print('Stack created!\n')
        print('----------------------------')
        print('All cubes '+cell+' for '+str(year)+' done!')
        print('----------------------------')