# Preprocessing datasets for RGI7 (RGI6 too for testing)

In [1]:
import os 
import glob # finds all the pathnames matching a specified pattern according to the rules used by the Unix shell
import numpy as np
import pandas as pd
import geopandas as gpd

import rasterio
from rasterio.merge import merge
from rasterio.plot import show
from rasterio.enums import Resampling

# Import libraries
import rioxarray as riox
from rioxarray.merge import merge_arrays


## Preprocess Millan et al. 2022 dataset

In [2]:
# list_files = glob.glob("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/Millan_2022/THICKNESS*.tif")
list_files = glob.glob("/Users/milliespencer/Desktop/data_for_preprocessing/M22/Millan_2022_data/RGI-17_thickness/THICKNESS_RGI-17.2_2022August24.tif")
# there's two tif files (17.2 and 17.3) for my region... can i merge both? also do I disregard the error files? 

In [3]:
glacier_list = []

# Read rasters file
for glacier in list_files:
    glacier_i = riox.open_rasterio(glacier)
    # glacier_i = glacier_i.rio.reproject("EPSG:32718")
    glacier_i = glacier_i.rio.reproject("EPSG:32719") # my region is slightly further east 
    glacier_list.append(glacier_i)

# Merge/Mosaic multiple rasters using merge_arrays method of rioxarray
merged_raster = merge_arrays(dataarrays = glacier_list, res = (50, 50), crs="EPSG:32719", method='max')
merged_raster = merged_raster.round(0).astype("int16")

# Save Raster to disk
# merged_raster.rio.to_raster("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/Thickness_2022.tif")
merged_raster.rio.to_raster("/Users/milliespencer/Desktop/data_for_preprocessing/M22/Millan_2022_data/Thickness_2022.tif")

## Preprocess Farinotti et al. 2019 dataset

In [46]:
# Rodrigo suggests RGI6 not 7 
# RGI6 = gpd.read_file("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/RGI6.shp")
RGI6 = gpd.read_file("/Users/milliespencer/Desktop/data_for_preprocessing/RGI6/17_rgi60_SouthernAndes/17_rgi60_SouthernAndes.shp")

# RGI6 = RGI6[RGI6.CenLat < -40.5]  # only glaciers in southern Chile-Argentina
RGI6 = RGI6[(RGI6.CenLat >= -40) & (RGI6.CenLat <= -36.13)]
# made it 36.13 because I feel like there's already a lot of studies on Glacier Universidad, plus it's not a volcano 
# alternatively could make the region the border of Maule or Nuble instead of a latitude line... open to thoughts 

# split list ## what's this part? not sure what this is doing 
def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

# File and folder paths
# list_files = glob.glob("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/Farinotti_2019/RGI*.tif")
list_files = glob.glob("/Users/milliespencer/Desktop/data_for_preprocessing/F19/RGI60-17/*.tif") # looks for all tif files that start with RGI in this folder 
list_files_df = pd.DataFrame(list_files,  columns =['dir']) # Creates a DataFrame from list_files
list_files_df["RGIId"] = list_files_df.dir.str[65:79] # extracts characters from index 65 to 79 from each file path string. The extracted substring is assigned to a new column named RGIId.
# Rodrigo's index was 64 to 77, not sure why it shifted but double checked that my index covers the RGI ID number 

list_files_df = list_files_df[list_files_df['RGIId'].isin(RGI6.RGIId)]
list_files = list_files_df.dir.tolist() # 818 glaciers

list_files_split = list(split(list_files, 5)) # maximum number of glaciers: 4000 app
# not totally sure why we're doing this - just to make the batch size more managable? maybe not necessary since I have way fewer glaciers 

In [49]:
for i in range(0,5): # divide in several grops (5 in this case)
    sublist = list_files_split[i]
    glacier_list = []

    for glacier in sublist:
        glacier_i = riox.open_rasterio(glacier)
        glacier_i = glacier_i.rio.reproject("EPSG:32719", resampling = Resampling.bilinear)
        glacier_list.append(glacier_i)
        
    merged_raster = merge_arrays(dataarrays = glacier_list, res = (50, 50), crs="EPSG:32719", method='max')
    merged_raster.rio.to_raster("/Users/milliespencer/Desktop/data_for_preprocessing/RGI6/group_" + str(i) + ".tif")
    print(i)

0
1
2
3
4


In [50]:
list_files = glob.glob("/Users/milliespencer/Desktop/data_for_preprocessing/RGI6/group_*.tif") # merge 5 groups

glacier_list = []

# Read rasters file
for glacier in list_files:
    glacier_i = riox.open_rasterio(glacier)
    glacier_i = glacier_i.rio.reproject("EPSG:32719", resampling = Resampling.bilinear)
    glacier_list.append(glacier_i)

# Merge/Mosaic multiple rasters using merge_arrays method of rioxarray
merged_raster = merge_arrays(dataarrays = glacier_list, res = (100, 100), crs="EPSG:32719", method='max')

# Save Raster to disk
merged_raster.rio.to_raster("/Users/milliespencer/Desktop/data_for_preprocessing/Thickness_2019.tif")

In [51]:
for file in list_files: # remove temporary files
    os.remove(file)

## Preprocess Huggonet et al. 2021 dataset

DOI: https://doi.org/10.6096/13
Data downloaded on July 29, 2024
Picked 2000-2021 Elevation Change data 

In [53]:
## dhdt
# list_files = glob.glob("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/dhdt/*.tif")
list_files = glob.glob("/Users/milliespencer/Desktop/data_for_preprocessing/H21/17_rgi60_2000-01-01_2020-01-01/dhdt/*.tif")

glacier_list = []

# Read rasters file
for glacier in list_files:
    glacier_i = riox.open_rasterio(glacier)
    glacier_i = glacier_i.rio.reproject("EPSG:32719", resampling = Resampling.bilinear)
    glacier_list.append(glacier_i)

# Merge/Mosaic multiple rasters using merge_arrays method of rioxarray
merged_raster = merge_arrays(dataarrays = glacier_list, res = (100, 100), crs="EPSG:32719", method='max')

# Save Raster to disk
merged_raster.rio.to_raster("/Users/milliespencer/Desktop/data_for_preprocessing/dhdt_2021.tif")

In [54]:
# dhdt_error
# list_files = glob.glob("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/dhdt_error/*.tif")
list_files = glob.glob("/Users/milliespencer/Desktop/data_for_preprocessing/H21/17_rgi60_2000-01-01_2020-01-01/dhdt_err/*.tif")

glacier_list = []

# Read rasters file
for glacier in list_files:
    glacier_i = riox.open_rasterio(glacier)
    glacier_i = glacier_i.rio.reproject("EPSG:32719", resampling = Resampling.bilinear)
    glacier_list.append(glacier_i)

# Merge/Mosaic multiple rasters using merge_arrays method of rioxarray
merged_raster = merge_arrays(dataarrays = glacier_list, res = (100, 100), crs="EPSG:32719", method='max')

# Save Raster to disk
# merged_raster.rio.to_raster("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/dhdt_error_2021.tif")
merged_raster.rio.to_raster("/Users/milliespencer/Desktop/data_for_preprocessing/dhdt_error_2021.tif")