# Automates the reading, masking, and stacking of Sentinel-2 Imagery

I use this code after I download S2 imagery from Copernicus. (https://scihub.copernicus.eu/dhus/#/home)

Stacks only the visible bands, NIR, RE and SWIR (bands 2, 3, 4, 8, 8A, 12). Resamples 20m bands (8A and 12) to 10m. This can be edited to include additional bands, but you will need to change the code.

Parameters:
dir_list: list of string, Create a list of image directories for every image you want processed. Set the directory to the 'IMG_DATA' file. Sentinel-2 L2A should all be formatted the same, which lets this code be universal

veg_only: bool. If set to True, masks everything but vegetation.  If set to False, masks out bad pixels (1), cloud shadows (3), water (6), high probability clouds (9) and snow/ice (11).

no_mask: bool. If set to True, only No Data values are "masked" (essentially, masking is turned off; negates the veg_only parameter).

Scene Classification (used for masking): 
- 0 = No Data (missing data)
- 1 = Saturated or defective pixel
- 2 = Dark features/Shadows
- 3 = Cloud shadows
- 4 = Vegetation
- 5 = Not-vegetated
- 6 = Water
- 7 = Unclassified
- 8 = Cloud medium probability
- 9 = Cloud high probability
- 10 = Thin cirrus
- 11 = Snow or ice

WINDOWS USERS: You will need to change the "/" to "\" where indicated. The direction of the slash matters or the code won't work for you.

In [None]:
import os
import numpy as np
import numpy.ma as ma
import rasterio
from rasterio.enums import Resampling

In [None]:
### This is the only block you will need to edit ###

dir_list = ['.../S2B_MSIL2A_20201029T190519_N0214_R013_T10TEQ_20201029T213221.SAFE/GRANULE/L2A_T10TEQ_A019058_20201029T190650/IMG_DATA',
           ]

veg_only = False
no_mask = False

In [None]:
def untile_image(img_fp):
    '''
    Removed tiling from image to allow for resamling to match imagery
    '''
    img = rasterio.open(img_fp)
    img_array = img.read(1)    
    untiled_fp = '%s_untiled.tif' %img_fp[:-4]

    with rasterio.Env():
        profile = img.profile
        profile['tiled']=False
        profile['driver']='GTiff'
        with rasterio.open(untiled_fp, 'w', **profile) as dst:
            dst.write(img_array, 1)

    return untiled_fp

def rescale (scale_factor, dataset, ref_dataset, output_fp):
    
    scale_factor = scale_factor

    with rasterio.open(dataset) as dataset:

        # resample data to target shape
        data = dataset.read(
            out_shape=(
                dataset.count,
                int(dataset.height * scale_factor),
                int(dataset.width * scale_factor)
            ),
            resampling=Resampling.bilinear 
        )
        
    ref_img = rasterio.open(ref_dataset)

    with rasterio.Env():
        profile = ref_img.profile
        profile['tiled'] = False 
        profile['driver'] = 'GTiff'
        with rasterio.open(output_fp, 'w', **profile) as dst:
            dst.write(data)
            
def get_image_name(image_directory):
    file = fp + '/R10m'              #WINDOWS USERS: change slash direction here.
    names = os.listdir(file)[0]
    img_name = names[:22]
    return img_name

def rescale_mask (scale_factor, dataset, ref_dataset):    
    
    scale_factor = scale_factor

    with rasterio.open(dataset) as dataset:

        # resample data to target shape
        data = dataset.read(
            out_shape=(
                dataset.count,
                int(dataset.height * scale_factor),
                int(dataset.width * scale_factor)
            ),
            resampling=Resampling.nearest 
        )
    
    ref_img = rasterio.open(ref_dataset)

    with rasterio.Env():
        profile = ref_img.profile
        profile['tiled'] = False
        profile['dtype'] = 'uint8'
        profile['driver']='GTiff'
        with rasterio.open('Resampled_Mask.tif', 'w', **profile) as dst:
            dst.write(data)
                
    return data

In [None]:
# WINDOWS USERS:  Change the direction of the slash where needed in
#                 all the strings in this block.

for fp in dir_list:
    os.chdir(fp)
    
    img_identifier = get_image_name(fp)

    re = 'R20m/%s_B8A_20m.jp2' %img_identifier 
    swir = 'R20m/%s_B12_20m.jp2' %img_identifier 

    qi = 'R20m/%s_SCL_20m.jp2'%img_identifier 

    ref_img = 'R10m/%s_B02_10m.jp2' %img_identifier 
    
    stack_list = ['R10m/%s_B02_10m.jp2' %img_identifier,
                  'R10m/%s_B03_10m.jp2' %img_identifier,
                  'R10m/%s_B04_10m.jp2' %img_identifier,
                  'R10m/%s_B08_10m.jp2' %img_identifier]
    
    untiled_10m_images = []
    
    for image in stack_list:
        img = rasterio.open(image)
        if img.profile['tiled'] == True:
            untiled_img_fp = untile_image(image)
            untiled_10m_images.append(untiled_img_fp)
            
    if len(untiled_10m_images) != 0:
        stack_list = untiled_10m_images
    
    #Set up a list 20m images to be incorperated into the stack
    images_20m = [re, swir]
    
    # Determine if DEM needs to be untiled
    
    untiled_images = []
    
    for image in images_20m:
        img = rasterio.open(image)
        if img.profile['tiled'] == True:
            untiled_img_fp = untile_image(image)
            untiled_images.append(untiled_img_fp)
            
    if len(untiled_images) != 0:
        images_20m = untiled_images
    
    # Resample 20m images to 10m and add them to the list to be stacked
    for image in images_20m:
        rescaled_fp = image[:-4] + '_resampled.tif'
        stack_list.append(rescaled_fp)
        
        rescale((2), image, ref_img, rescaled_fp)
    
    # Create a mask
    qi = rasterio.open(qi)
    mask_array = qi.read(1)
    
    if veg_only == True:
        mask = (mask_array==4)
        qa_mask = np.invert(mask) #Inverstion to mask all but vegetation
        
    # Add addtional items here to mask out as desired
    else: 
        qa_mask = (mask_array==1)|(mask_array==3)|(mask_array==6)|(mask_array==9)|(mask_array==11)
    
    if no_mask == True:
        qa_mask = (mask_array==0)
        
    # Convert to integar array
    mask_int = qa_mask.astype('uint8')
    
    mask_vals = np.unique(mask_int, return_counts=True)
    percent_masked = (mask_vals[1][1])/(sum(mask_vals[1]))
    print('Percent of image masked: ' + str(percent_masked*100))
    
    # Write mask to file
    kwds = qi.profile
    kwds['nodata'] = 0
    kwds['driver'] = 'GTiff'

    with rasterio.open('Mask.tif', 'w', **kwds) as dst:
         dst.write(mask_int, 1)
            
    # Rescale the mask to match 10m
    new_mask = rescale_mask(2, 'Mask.tif', ref_img)
    
    # Convert the mask back to bool
    new_mask_bool = new_mask == 1
    
    # Open all bands, mask them, and write to new file
    # Read metadata of first file
    with rasterio.open(stack_list[0]) as src0:
        meta = src0.meta

    # Update meta to reflect the number of layers
    meta['count'] = len(stack_list)
    meta['nodata'] = 0
    meta['tiled'] = True
    
    new_stack = '%s_masked_stack.tif' %img_identifier
    
    for num, image in enumerate(stack_list):
        print('Masking and writing band ', num+1)
        org = rasterio.open(image)
        org_band = org.read(1)
        tmp = ma.masked_array(org_band, new_mask_bool)
        arr = np.zeros((np.shape(org_band)[0], np.shape(org_band)[1]), dtype=np.uint16)
        arr = ma.filled(tmp, 0)

        if num == 0:
            with rasterio.open(new_stack, 'w', **meta) as dst:
                dst.write(arr, num+1)
        else:
            with rasterio.open(new_stack, 'r+', **meta) as dst:
                dst.write(arr, num+1)