In [1]:
import os
import matplotlib.pyplot as plt
from osgeo import gdal
import geopandas as gpd
import pandas as pd
import shutil

The goal of this script is to merge output geotiff shadow mask products from GEE into a single geotiff.

In [2]:
# set folder paths for the raw and merged products
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
raw_folder_path = os.path.join('C:',os.sep,'Users','lzell','Documents','shadows_to_merge')
merged_folder_path = os.path.join('C:',os.sep,'Users','lzell','Documents','shadows_merged')

# set path to RGI shapefile
rgi_path = os.path.join(folder_AGVA, 'RGI', "01_rgi60_Alaska", "01_rgi60_Alaska.shp")

# open rgi data, but drop the geometry column
rgi_df = gpd.read_file(rgi_path, ignore_geometry=True)

# print(rgi_df.head())

In [3]:
# compile list of all the image names in the raw folder, format as df
raw_images_df = pd.DataFrame({"file_name":os.listdir(raw_folder_path)})

# create column for rgi id, sort by this
raw_images_df['RGIId'] = [i[3:17] for i in raw_images_df['file_name']]
raw_images_df= raw_images_df.sort_values(by='RGIId', ascending=True)

# now get list of the unique RGIIds
unique_rgiids = raw_images_df['RGIId'].unique()

# print(raw_images_df.head())
# print(unique_rgiids[:40])

In [5]:
# now for RGIId, grab all the images of it and merge them

# iterate through the list of unique rgiids
for i in range(len(unique_rgiids)):
    # print(f"{i} of {len(unique_rgiids)}")
    # if i>0: continue

    # get the RGIId
    id_i = unique_rgiids[i]

    # subset raw_images_df to the rows with this rgiid
    subset_df = raw_images_df[raw_images_df['RGIId']==id_i]

    # grab the image names
    names_to_use = subset_df['file_name'].values

    # make raw image names to paths
#     image_paths = [f'{raw_folder_path}{i}' for i in names_to_use]
    image_paths = [os.path.join(raw_folder_path, i) for i in names_to_use]
    
    # count the number of images that will be merged
    n_images = len(names_to_use)

    # define the path to save merged file to
    out_name = names_to_use[0][:39]
#     out_path = f'{merged_folder_path}{out_name}.tif'
    out_path = os.path.join(merged_folder_path, f"{out_name}.tif")
    
    # check if this exists. if so, skip
    if os.path.exists(out_path):
        continue

    # merge them with gdal. note that we also apply zstd compression to these, because they are the largest files
    print(f"{i} of {len(unique_rgiids)} - {out_name} is about to be mosaiced to destination. n={n_images}")

    # define path for vrt
    vrt_path = f'{merged_folder_path}merged.vrt'

    # do the mosaicing
    vrt = gdal.BuildVRT(vrt_path, image_paths)
    gdal.Translate(out_path, vrt, outputType=gdal.GDT_Byte, creationOptions = ['PREDICTOR=1','COMPRESS=ZSTD', 'ZSTD_LEVEL=1', "TILED=TRUE"])
    print("mosaicing done")

    # remove vrt variable from memory, delete file
    vrt = None #os.remove(vrt_path)


0 of 1 - S2_RGI60-01.01390_2018-01-01_2023-01-01 is about to be mosaiced to destination. n=4




mosaicing done
