# Greenland peripherial glacier terminus image processing

### Jukes Liu

## 1) Import packages, set base path, set glaciers of interest by BoxID

In [2]:
import subprocess
import numpy as np
import os
import pandas as pd
import rasterio

#SET basepath to your own folder
basepath='/home/jukes/Documents/Sample_glaciers/'

#ENTER list of glaciers of interest by BoxID
#make this into a widget where you can enter them in?
BOXIDS = ['001', '002', '004', '033', '120', '174', '235', '259', '277', '531'];

## 2) Create buffer zone around terminus boxes and rasterize/subset terminus boxes

The following code pulls the buffer distances around the terminus boxes from an existing .csv file with the exported attributes tables for the peripheral glacier terminus boxes. These buffer distances will be used to create a buffer zone to subset the Landsat scenes.

In [4]:
#PULL buffer distances around terminus boxes from a csv file of attributes
df=pd.read_csv(basepath+'Boxes_attributes.csv', sep=','); #reads in csv file 
df_buffdist = df["Buff_dist"].copy(); #creates a dataframe of the buffer distances

df_boxid = df["BoxID"].copy(); #creates a data frame of the BoxIDs
df_b = pd.concat([df_buffdist, df_boxid], axis=1); #concatenates the two columns

#CREATE dictionary of buffer distances with BoxID as the key
bd = df_b.set_index('BoxID').T.to_dict('list'); #turns the concatenated df into a dictionary

#PRINT buffer distances for the glaciers of interest
for BoxID in BOXIDS: 
    bd_key = int(BoxID)
    buff_dist = str(int(bd[bd_key][0]))
    print("Box"+BoxID+":", buff_dist, "meters")


Box001: 1766 meters
Box002: 6741 meters
Box004: 7112 meters
Box033: 5424 meters
Box120: 1720 meters
Box174: 3729 meters
Box235: 2620 meters
Box259: 6974 meters
Box277: 2273 meters
Box531: 2657 meters


The next section creates a buffer zone using GDAL command **ogr2ogr** with the following syntax:

    ogr2ogr Buffer###.shp path_to_terminusbox###.shp  -dialect sqlite -sql "SELECT ST_Buffer(geometry, buffer_distance) AS geometry,*FROM 'Box###'" -f "ESRI Shapefile"

In [5]:
#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for BoxID in BOXIDS:
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    outputbuffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #PULL the buffer distances as strings from the bd dictionary using the BoxID keys
    bd_key = int(BoxID)
    buff_dist = str(int(bd[bd_key][0]))
    
    #SET buffer command and print to check it
    buffer_cmd = 'ogr2ogr '+outputbuffer_path+" "+terminusbox_path+' -dialect sqlite -sql "SELECT ST_Buffer(geometry, '+buff_dist+") AS geometry,*FROM 'Box"+BoxID+"'"+'" -f "ESRI Shapefile"'
    #print(export_GDALpath, buffer_cmd)
    
    subprocess.call(export_GDALpath+buffer_cmd, shell=True)
    
    print("Box"+BoxID)


Box001
Box002
Box004
Box033
Box120
Box174
Box235
Box259
Box277
Box531


The terminus box shapefiles are then rasterized (to be used as a mask during the WTMM filering) using the GDAL **gdal_rasterize** command and subset to the buffer zone using the GDAL **gdalwarp** command using the following syntax:

1) Rasterize

    gdal_rasterize -burn 1.0 -tr x_resolution y_resolution -a_nodata 0.0 path_to_terminusbox.shp path_to_terminusbox_raster.TIF

The x_resolution and y_resolution are set to be 15.0 (meters) to match the Landsat B8 resolution.
    
2) Subset

    gdalwarp -cutline path_to_Buffer###.shp -crop_to_cutline path_to_terminusbox_raster.TIF path_to_subset_raster_cut.TIF

In [141]:
#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for BoxID in BOXIDS:
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    buffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #output raster path:
    terminusraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+".TIF"
    cutraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+"_cut.TIF"
    
    #SET commands and print to check
    rasterize_cmd = 'gdal_rasterize -burn 1.0 -tr 15.0 15.0 -a_nodata 0.0 '+terminusbox_path+' '+terminusraster_path
    subsetbuffer_cmd = 'gdalwarp -cutline '+buffer_path+' -crop_to_cutline '+terminusraster_path+" "+cutraster_path
    #print(export_GDALpath+rasterize_cmd)
    #print(export_GDALpath+subsetbuffer_cmd)
    
    #RASTERIZE & SUBSET
    subprocess.call(export_GDALpath+rasterize_cmd, shell=True)
    subprocess.call(export_GDALpath+subsetbuffer_cmd, shell=True)
    
    print("Box"+BoxID)
    

Box001
Box002
Box004
Box033
Box120
Box174
Box235
Box259
Box277
Box531


## 3) Subset downloaded Landsat scenes

In [6]:
#READ in the Landsat path and row information for the glaciers
pathrows_df = pd.read_csv(basepath+'LS_pathrows_multiple.csv', sep=',', usecols =[1,2,3], dtype=str)
pathrows_df

Unnamed: 0,BoxID,Path,Row
0,1,36,4
1,1,37,4
2,1,35,5
3,1,33,5
4,1,34,5
5,1,31,5
6,1,32,5
7,2,33,5
8,2,35,5
9,2,34,5


The following section of code accesses the Landsat 8 scenes downloaded and stored in Path###_Row### folders to 1) reproject them into Greenland Polar Stereographic Coordinates and 2) subset them using the buffer zone shapefiles created from the previous step. The two GDAL commands (**gdalwarp**) use the following syntax:

1) Reproject

    gdalwarp -t_srs ‘+proj=stere +lat_ts=70 +lat_0=90 +lon_0=-45 +y=0 +x=0 +k=1 +datum=WGS84 +units=m’ path_to_input_image.TIF path_to_renamed_output.TIF

2) Subset

    gdalwarp -cutline _path_to_Buffer###.shp -crop_to_cutline path_to_input_image.TIF path_to_renamed_output.TIF

The reprojected images B8_PS.TIF are saved in the Path_Row folders along with the original B8 images while the subsets are saved in the BoxID folders.

In [11]:
#set path to LS8 scenes
path_toscenedirectory = '/media/jukes/jukes1/LS8aws/'

BoxIDs_sub = ['001', '002', '004', '033', '120', '174', '235', '259', '277', '531']

#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for i in range(0, len(pathrows_df)):
    
    path = pathrows_df['Path'][i]
    row = pathrows_df['Row'][i]
    BoxID = pathrows_df['BoxID'][i]
    
    if BoxID in BoxIDs_sub:
        #SELECT folder with LS8 scenes for each glacier by Path and Row folder name
        foldername = "Path"+path+"_Row"+row
        path_toscenes = path_toscenedirectory+foldername+"/"
        path_toBoxIDscenes = path_toscenedirectory+"Box"+BoxID+"/"
    #     print(path_toBoxIDscenes)

        #set path to buffer zone
        buffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"

        #set output path for subset images
        subsetout = path_toscenedirectory+'Box'+BoxID+'/'
    #     print(subsetout)

        if os.path.exists(path_toscenes):
            print("Starting Path", path, "Row", row, "Box", BoxID)
            scenes_in_PRfolder = os.listdir(path_toscenes)
        #     print(scenes_in_PRfolder)

            #STEPS for each scene listed in the BoxID folder:
            for scene in os.listdir(path_toBoxIDscenes):
                #if it is a scene in the BoxID folder and a scene in the PATHROW folder (list), continue:
                if scene.endswith("LGN", -5, -2) and scene in scenes_in_PRfolder:
                    #set flag for projection
                    projected = False
                    subset = False

                    #go to files in each PATH_ROW scene folder
                    path_tobands = path_toscenes+scene+'/'
            #         print(path_toimages)

                    #CHECK IF IMAGES HAVE ALREADY BEEN REPROJECTED OR SUBSET
                    for image in os.listdir(path_tobands):
                        if image.endswith('B8_PS.TIF'):
                            #if reprojected image is detected, it's already been reprojected, flip the flag to stop reprojection
                            projected = True
                    for image in os.listdir(subsetout):
                        #if detected that the subset image is output here:
                        if image.endswith(".TIF") and scene in image:
                            subset = True

                    #REPROJECT if the images have not already been reprojected
                    if projected == False:
            #         if BoxID == '001':
                        for image in os.listdir(path_tobands):
                            if image.endswith('B8.TIF'):
                                print(image)
                                #REPROJECT into Greenland Polar Stereographic
                                reproject_cmd = "gdalwarp -t_srs '+proj=stere +lat_ts=70 +lat_0=90 +lon_0=-45 +y=0 +x=0 +k=1 +datum=WGS84 +units=m' "+path_tobands+image+" "+path_tobands+scene+"_B8_PS.TIF"
#                                 print(export_GDALpath+reproject_cmd)
                                print("Reprojected.")
                                subprocess.call(export_GDALpath+reproject_cmd, shell=True)
        
                                rmv_B8 = "rm "+path_toscenedirectory+foldername+"/"+scene+"/"+image
                                subprocess.call(rmv_B8, shell=True)
                                print("B8 removed.")
                    if projected == True:
                        print(scene, 'has already been reprojected.')

                    #SUBSET LS8 scenes to buffer zones around the terminus box if they haven't already been subset
                    if subset == False:
                        for image in os.listdir(path_tobands):
                            if image.endswith("B8_PS.TIF"):
                                #save subsetted scenes as PGM files to BoxID folders in scene directory
                                subsetbuffer_cmd = 'gdalwarp -cutline '+buffer_path+' -crop_to_cutline '+path_tobands+scene+"_B8_PS.TIF "+subsetout+scene+"_B8_PS_Buffer"+BoxID+".TIF"
            #                     print(export_GDALpath+subsetbuffer_cmd)
                                print("Subset.")
                                subprocess.call(export_GDALpath+subsetbuffer_cmd, shell=True)
                    if subset == True:
                        print(scene, 'has already been subset.')

            print("Path", path, "Row", row, "Box"+BoxID, "done")
        else:
            print("Path", path, "Row", row, "Box"+BoxID, "has been deleted")

Starting Path 036 Row 004 Box 001
LC80360042017077LGN00 has already been reprojected.
LC80360042017077LGN00 has already been subset.
LC80360042015232LGN00 has already been reprojected.
LC80360042015232LGN00 has already been subset.
LC80360042015072LGN00 has already been reprojected.
LC80360042015072LGN00 has already been subset.
LC80360042013258LGN00 has already been reprojected.
LC80360042013258LGN00 has already been subset.
LC80360042016267LGN00 has already been reprojected.
LC80360042016267LGN00 has already been subset.
LC80360042016203LGN00 has already been reprojected.
LC80360042016203LGN00 has already been subset.
LC80360042015280LGN00 has already been reprojected.
LC80360042015280LGN00 has already been subset.
LC80360042014245LGN00 has already been reprojected.
LC80360042014245LGN00 has already been subset.
LC80360042015168LGN00 has already been reprojected.
LC80360042015168LGN00 has already been subset.
LC80360042016219LGN00 has already been reprojected.
LC80360042016219LGN00 h

LC82320182014338LGN00 has already been reprojected.
LC82320182014338LGN00 has already been subset.
LC82320182017074LGN00 has already been reprojected.
LC82320182017074LGN00 has already been subset.
LC82320182016328LGN00 has already been reprojected.
LC82320182016328LGN00 has already been subset.
LC82320182013255LGN00 has already been reprojected.
LC82320182013255LGN00 has already been subset.
LC82320182014130LGN00 has already been reprojected.
LC82320182014130LGN00 has already been subset.
LC82320182016040LGN00 has already been reprojected.
LC82320182016040LGN00 has already been subset.
LC82320182014098LGN00 has already been reprojected.
LC82320182014098LGN00 has already been subset.
LC82320182016088LGN00 has already been reprojected.
LC82320182016088LGN00 has already been subset.
LC82320182013351LGN00 has already been reprojected.
LC82320182013351LGN00 has already been subset.
LC82320182017010LGN00 has already been reprojected.
LC82320182017010LGN00 has already been subset.
LC82320182

B8 removed.
LC82320172016168LGN00 has already been subset.
LC82320172016328LGN00 has already been reprojected.
LC82320172016328LGN00 has already been subset.
LC82320172015085LGN00 has already been reprojected.
LC82320172015085LGN00 has already been subset.
LC82320172014130LGN00 has already been reprojected.
LC82320172014130LGN00 has already been subset.
LC82320172016312LGN00 has already been reprojected.
LC82320172016312LGN00 has already been subset.
LC82320172016264LGN00_B8.TIF
Reprojected.
B8 removed.
LC82320172016264LGN00 has already been subset.
LC82320172014306LGN00 has already been reprojected.
LC82320172014306LGN00 has already been subset.
LC82320172015341LGN00 has already been reprojected.
LC82320172015341LGN00 has already been subset.
LC82320172014322LGN00 has already been reprojected.
LC82320172014322LGN00 has already been subset.
LC82320172014210LGN00 has already been reprojected.
LC82320172014210LGN00 has already been subset.
LC82320172015005LGN00 has already been reproject

LC80120022017101LGN00 has already been reprojected.
LC80120022017101LGN00 has already been subset.
LC80120022016115LGN00 has already been reprojected.
LC80120022016115LGN00 has already been subset.
LC80120022016147LGN00 has already been reprojected.
LC80120022016147LGN00 has already been subset.
LC80120022015256LGN00 has already been reprojected.
LC80120022015256LGN00 has already been subset.
LC80120022016259LGN00 has already been reprojected.
LC80120022016259LGN00 has already been subset.
LC80120022013122LGN01 has already been reprojected.
LC80120022013122LGN01 has already been subset.
LC80120022016131LGN00 has already been reprojected.
LC80120022016131LGN00 has already been subset.
Path 012 Row 002 Box531 done
Starting Path 013 Row 002 Box 531
LC80130022014180LGN00 has already been reprojected.
LC80130022014180LGN00 has already been subset.
LC80130022016106LGN00 has already been reprojected.
LC80130022016106LGN00 has already been subset.
LC80130022016186LGN00 has already been reproje

## Determine which BoxIDs have been fully reprojected & subset

In [12]:
#set path to LS8 scenes
path_toscenedirectory = '/media/jukes/jukes1/LS8aws/'
BoxIDs_sub = []
for i in range(0, len(BOXIDS)): 
    path = pathrows_df['Path'][i]
    row = pathrows_df['Row'][i]
    BoxID = BOXIDS[i]
    
    #SELECT folder with LS8 scenes for each glacier by Path and Row folder name
    foldername = "Path"+path+"_Row"+row
    path_toscenes = path_toscenedirectory+foldername+"/"
    path_toBoxIDscenes = path_toscenedirectory+"Box"+BoxID+"/"
#     print(path_toBoxIDscenes)
#     print("Starting Path", path, "Row", row, "Box", BoxID)
    
    #set path to buffer zone
    buffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #set output path for subset images
    subsetout = path_toscenedirectory+'Box'+BoxID+'/'
#     print(subsetout)
    
    allfiles = os.listdir(subsetout)
    tot_count = 0
    scene_count = 0
    subset_count = 0
    #count all scene folders and subset images (total)
    for file in allfiles:
        if file.startswith("LC"):
            tot_count = tot_count+1
        
        if file.startswith("LC") and file.endswith("LGN", -5, -2):
            scene_count = scene_count+1
            
        if file.startswith("LC") and file.endswith(".TIF"):
            subset_count = subset_count+1            
    
    print("Box", BoxID, ", Total", tot_count, ", Scenes", scene_count, ", Subsets", subset_count)
    if scene_count != subset_count:
        print("Not all images analyzed for Box", BoxID)
        BoxIDs_sub.append(BoxID)

print(BoxIDs_sub)

Box 001 , Total 400 , Scenes 200 , Subsets 200
Box 002 , Total 106 , Scenes 53 , Subsets 53
Box 004 , Total 152 , Scenes 76 , Subsets 76
Box 033 , Total 310 , Scenes 155 , Subsets 155
Box 120 , Total 276 , Scenes 138 , Subsets 138
Box 174 , Total 84 , Scenes 42 , Subsets 42
Box 235 , Total 236 , Scenes 118 , Subsets 118
Box 259 , Total 242 , Scenes 121 , Subsets 121
Box 277 , Total 82 , Scenes 41 , Subsets 41
Box 531 , Total 632 , Scenes 316 , Subsets 316
[]


## 4) Calculate average flow direction (weighted by magnitude) for each glacier

The following code processes 2016-2017 ice velocity data from the ESA Cryoportal to determine each glacier of interest's weighted average flow direction. The ice velocity direction (calculated from yx velocity) and the velocity magnitude at each glacier's terminus  is subset using the terminus box shapefile using a GDAL command (**gdalwarp**) with the following syntax:

    gdalwarp -cutline path_to_terminusbox.shp -crop_to_cutline path_to_input_velocity.TIF path_to_output_velocity_at_term###.TIF

In [20]:
#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for BoxID in BOXIDS:
    #SET paths to the terminus box shapefiles and velocity data
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    v_dir_path = basepath+'dir_degree_yx_velocity.tif'
    v_mag_path = basepath+'magnitude_velocity.tif'  
    
    #SET output paths
    v_dir_output = basepath+"Box"+BoxID+"/dir_degree_yx_velocity_at_term"+BoxID+".tif"
    v_mag_output = basepath+"Box"+BoxID+"/magnitude_velocity_at_term"+BoxID+".tif"
    
    #SET velocity subset commands and print to check it
    v_subset_dir_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+v_dir_path+" "+v_dir_output
    v_subset_mag_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+v_mag_path+" "+v_mag_output
    #print(export_GDALpath+v_subset_dir_cmd)
    #print(export_GDALpath+v_subset_mag_cmd)
    
    #SUBSET velocity rasters
    subprocess.call(export_GDALpath+v_subset_dir_cmd, shell=True)
    subprocess.call(export_GDALpath+v_subset_mag_cmd, shell=True)
    
    print("Box"+BoxID)

Box001
Box002
Box004
Box033
Box120
Box174
Box235
Box259
Box277
Box531


Next, these subset velocity rasters are opened using the **rasterio** package and read into arrays. They are filtered for anomalous values and the velocity magnitudes are converted into weights. Then the **numpy.average()** function is used to calculated the weighted average flow directions where the flow directions of the pixels where the highest velocities are found are weighted more. 

The resulting average flow direction will be representative of the glacier's main flow. These directions will be used to rotate the images of the glaciers so that their flow is due right.

In [22]:
#CREATE list of glacier average flow directions:
rot_angles = {}
max_magnitudes = {}

for BoxID in BOXIDS :    
        #READ velocity direction and magnitude data at terminus for each glacier into an array
        direction = rasterio.open(basepath+'Box'+BoxID+'/dir_degree_yx_velocity_at_term'+BoxID+'.tif', "r")
        dir_array = direction.read()
        #print(dir_array.shape)
        magnitude = rasterio.open(basepath+'Box'+BoxID+'/magnitude_velocity_at_term'+BoxID+'.tif', "r")
        mag_array = magnitude.read()
        #print(mag_array.shape)
        
        
        #RESHAPE direction array and remove anomalous values
        dir2 = dir_array.reshape(dir_array.shape[1] * dir_array.shape[2])
        #direction must be between 180 and -180 degrees
        mask_dir2 = (dir2 < 180) & (dir2 > -180)
        masked_dir = dir2[mask_dir2]
        #print(masked_dir.min(), masked_dir.max())

        #RESHAPE magnitude array and remove anomalous values
        mag2 = mag_array.reshape(mag_array.shape[1] * mag_array.shape[2])
        #magnitude must be between 0 and 10 m/d
        mask_mag2 = (mag2 < 100) & (mag2 > 0)
        masked_mag = mag2[mask_mag2]
        #print(masked_mag.min(), masked_mag.max())
        
    
        #CALCULATE weights (0 - 1) from magnitudes
        mag_range = masked_mag.max() - masked_mag.min()
        stretch = 1/mag_range
        weights = stretch*(masked_mag - masked_mag.min())
        #print(weights.min(), weights.max()) #should be between 0 and 1
        #print(weights.shape, masked_dir.shape)
        
        
        #CALCULATE the weighted average rotation angle
        avg_dir = np.average(masked_dir, weights=weights)
        #print(avg_dir)
        
        #APPEND the rotation angles to the dictionary
        rot_angles.update( {BoxID: avg_dir} )
        
        #APPEND the maximum flow magnitude
        max_magnitudes.update( {BoxID: masked_mag.max()} )


print(rot_angles)
print(max_magnitudes)

#EXPORT TO CSV for rotations???

{'001': 56.284283, '002': 155.98723, '004': -3.4834337, '033': 142.11816, '120': -77.3864, '174': 12.677643, '235': -145.85077, '259': 98.999275, '277': -65.120186, '531': 78.83521}
{'001': 0.04377438, '002': 3.5832264, '004': 0.62308246, '033': 0.77165776, '120': 0.27788857, '174': 0.91450316, '235': 0.15709679, '259': 3.0749009, '277': 0.2860196, '531': 0.040784776}


## 5) Call FIJI scripts to perform rotations & resize

In [48]:
# RUN Rotate_LS.ijm

# RUN resize.ijm

## 6) Call Tcl scripts to perform 2D WTMM and filter for terminus line

In [None]:
#RUN parameters_gaussian.tcl
#RUN scr_gaussian.tcl

## 7) Call post-processing scripts