<a href="https://colab.research.google.com/github/lucas-zeller/Glacier-Snow-Lines/blob/main/MergeS2Class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
"""
Created on Fri Apr 15 08:59:25 2022

@author: lzell

https://www.youtube.com/watch?v=sBBMKbAj8XE
https://gdal.org/drivers/raster/gtiff.html
https://gdal.org/programs/gdal_translate.html
https://gdal.org/programs/gdal_merge.html

Designed to be run in Colab
"""

from IPython.display import clear_output

# !pip install rasterio
clear_output()

import os
# import rasterio as rio
# import numpy as np
# from rasterio.merge import merge as riomerge
# from rasterio.plot import show as rioshow
import matplotlib.pyplot as plt
from osgeo import gdal
import geopandas as gpd
import pandas as pd
# import glob
# import subprocess
import shutil

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The goal of this script is to merge output geotiffs from GEE into a single geotiff. Specifically, we are merging the "S2_Classified_Cloudmasked_Raw" folder images, and saving them in "S2_Classified_Cloudmasked_Merged"

We will also open up the RGI outlines so that we can filter by RGI subregion

In [21]:
# set folder paths for the raw and merged products
raw_folder_path = '/content/drive/My Drive/AGVA Snow Lines/S2_Classified_Cloudmasked_Raw/'
merged_folder_path = '/content/drive/My Drive/AGVA Snow Lines/S2_Classified_Cloudmasked_Merged/'

# # set folder path for metadata
# meta_folder_path = '/content/drive/My Drive/AGVA Snow Lines/metadata/'

# set path to RGI shapefile
rgi_path = '/content/drive/My Drive/01_rgi60_Alaska/01_rgi60_Alaska.shp'

# open rgi data, but drop the geometry column
rgi_df = gpd.read_file(rgi_path, ignore_geometry=True)

print(rgi_df.head())

            RGIId         GLIMSId   BgnDate   EndDate   CenLon  CenLat  \
0  RGI60-01.00001  G213177E63689N  20090703  -9999999 -146.823  63.689   
1  RGI60-01.00002  G213332E63404N  20090703  -9999999 -146.668  63.404   
2  RGI60-01.00003  G213920E63376N  20090703  -9999999 -146.080  63.376   
3  RGI60-01.00004  G213880E63381N  20090703  -9999999 -146.120  63.381   
4  RGI60-01.00005  G212943E63551N  20090703  -9999999 -147.057  63.551   

  O1Region O2Region   Area  Zmin  ...  Slope  Aspect  Lmax  Status  Connect  \
0        1        2  0.360  1936  ...   42.0     346   839       0        0   
1        1        2  0.558  1713  ...   16.0     162  1197       0        0   
2        1        2  1.685  1609  ...   18.0     175  2106       0        0   
3        1        2  3.681  1273  ...   19.0     195  4175       0        0   
4        1        2  2.573  1494  ...   16.0     181  2981       0        0   

   Form  TermType  Surging  Linkages  Name  
0     0         0        9         

In [36]:
# compile list of all the image names in the raw folder, format as df
raw_images_df = pd.DataFrame({"file_name":os.listdir(raw_folder_path)})

# create column for rgi id, sort by this
raw_images_df['RGIId'] = [i[3:17] for i in raw_images_df['file_name']]
raw_images_df= raw_images_df.sort_values(by='RGIId', ascending=True)

# create column for O2 region
raw_images_df = raw_images_df.merge(rgi_df[['RGIId', 'O2Region']], on='RGIId', how='left')

# subset to just the O2 region(s) which you want
target_regions = ["4"]
raw_images_df = raw_images_df[raw_images_df['O2Region'].isin(target_regions)]

# now get list of the unique RGIIds
unique_rgiids = raw_images_df['RGIId'].unique()

# print(raw_images_df.head())
# print(unique_rgiids[:40])

                                       file_name           RGIId O2Region
110  S2_RGI60-01.01546_2018-01-01_2023-01-01.tif  RGI60-01.01546        4
111  S2_RGI60-01.01560_2018-01-01_2023-01-01.tif  RGI60-01.01560        4
112  S2_RGI60-01.01573_2018-01-01_2023-01-01.tif  RGI60-01.01573        4
113  S2_RGI60-01.01589_2018-01-01_2023-01-01.tif  RGI60-01.01589        4
114  S2_RGI60-01.01602_2018-01-01_2023-01-01.tif  RGI60-01.01602        4


In [47]:
# now for RGIId, grab all the images of it and merge them

# iterate through the list of unique rgiids
for i in range(len(unique_rgiids)):
    # print(f"{i} of {len(unique_rgiids)}")
    # if i>0: continue

    # get the RGIId
    id_i = unique_rgiids[i]

    # subset raw_images_df to the rows with this rgiid
    subset_df = raw_images_df[raw_images_df['RGIId']==id_i]

    # grab the image names
    names_to_use = subset_df['file_name'].values

    # make raw image names to paths
    image_paths = [f'{raw_folder_path}{i}' for i in names_to_use]

    # count the number of images that will be merged
    n_images = len(names_to_use)

    # define the path to save merged file to
    out_name = names_to_use[0][:39]
    out_path = f'{merged_folder_path}{out_name}.tif'

    # if there is only a single image, we can just copy this image to the destimation path
    if n_images==1:
        shutil.copy2(image_paths[0], out_path) #copy2 will overwrite any existing file by this name
        print(f"{i} of {len(unique_rgiids)} - {out_name} copied to destination")

    # otherwise, we will merge them with gdal. note that we also apply zstd compression to these, because they are the largest files
    else:
        print(f"{i} of {len(unique_rgiids)} - {out_name} is about to be mosaiced to destination. n={n_images}")

        # define path for vrt
        vrt_path = f'{merged_folder_path}merged.vrt'

        # do the mosaicing
        vrt = gdal.BuildVRT(vrt_path, image_paths)
        gdal.Translate(out_path, vrt, outputType=gdal.GDT_Byte, creationOptions = ['PREDICTOR=1','COMPRESS=ZSTD', 'ZSTD_LEVEL=1', "TILED=TRUE"])
        print("mosaicing done")

        # remove vrt variable from memory, delete file
        vrt = None #os.remove(vrt_path)

        # we need to forcibly remount the drive to make sure it doesn't disconnect
        # drive.mount("/content/drive", force_remount=True)

0 of 483 - S2_RGI60-01.01546_2018-01-01_2023-01-01 copied to destination
1 of 483 - S2_RGI60-01.01560_2018-01-01_2023-01-01 copied to destination
2 of 483 - S2_RGI60-01.01573_2018-01-01_2023-01-01 copied to destination
3 of 483 - S2_RGI60-01.01589_2018-01-01_2023-01-01 copied to destination
4 of 483 - S2_RGI60-01.01602_2018-01-01_2023-01-01 copied to destination
5 of 483 - S2_RGI60-01.01606_2018-01-01_2023-01-01 copied to destination
6 of 483 - S2_RGI60-01.01607_2018-01-01_2023-01-01 copied to destination
7 of 483 - S2_RGI60-01.01623_2018-01-01_2023-01-01 copied to destination
8 of 483 - S2_RGI60-01.01625_2018-01-01_2023-01-01 copied to destination
9 of 483 - S2_RGI60-01.01632_2018-01-01_2023-01-01 copied to destination
10 of 483 - S2_RGI60-01.01637_2018-01-01_2023-01-01 copied to destination
11 of 483 - S2_RGI60-01.01644_2018-01-01_2023-01-01 copied to destination
12 of 483 - S2_RGI60-01.01647_2018-01-01_2023-01-01 copied to destination
13 of 483 - S2_RGI60-01.01648_2018-01-01_2023-01