# Greenland peripherial glacier pre-image download processing

#### Jukes Liu
__Last modified 10-15-2019.__

## 1) Import packages, set base path, set glaciers of interest by BoxID

In [2]:
import subprocess
import numpy as np
import os
import pandas as pd
import rasterio

#SET basepath to your own folder
basepath='/home/jukes/Documents/Sample_glaciers/'

#ENTER list of glaciers of interest by BoxID
#make this into a widget where you can enter them in?
BOXIDS = ['001', '002', '004', '033', '120', '174', '235', '259', '277', '531'];

## 2) Create buffer zone around terminus boxes and rasterize/subset terminus boxes

The following code pulls the buffer distances around the terminus boxes from an existing .csv file with the exported attributes tables for the peripheral glacier terminus boxes. These buffer distances will be used to create a buffer zone to subset the Landsat scenes.

In [4]:
#PULL buffer distances around terminus boxes from a csv file of attributes
df=pd.read_csv(basepath+'Boxes_attributes.csv', sep=','); #reads in csv file 
df_buffdist = df["Buff_dist"].copy(); #creates a dataframe of the buffer distances

df_boxid = df["BoxID"].copy(); #creates a data frame of the BoxIDs
df_b = pd.concat([df_buffdist, df_boxid], axis=1); #concatenates the two columns

#CREATE dictionary of buffer distances with BoxID as the key
bd = df_b.set_index('BoxID').T.to_dict('list'); #turns the concatenated df into a dictionary

#PRINT buffer distances for the glaciers of interest
for BoxID in BOXIDS: 
    bd_key = int(BoxID)
    buff_dist = str(int(bd[bd_key][0]))
    print("Box"+BoxID+":", buff_dist, "meters")


Box001: 1766 meters
Box002: 6741 meters
Box004: 7112 meters
Box033: 5424 meters
Box120: 1720 meters
Box174: 3729 meters
Box235: 2620 meters
Box259: 6974 meters
Box277: 2273 meters
Box531: 2657 meters


The next section creates a buffer zone using GDAL command **ogr2ogr** with the following syntax:

    ogr2ogr Buffer###.shp path_to_terminusbox###.shp  -dialect sqlite -sql "SELECT ST_Buffer(geometry, buffer_distance) AS geometry,*FROM 'Box###'" -f "ESRI Shapefile"

In [5]:
#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for BoxID in BOXIDS:
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    outputbuffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #PULL the buffer distances as strings from the bd dictionary using the BoxID keys
    bd_key = int(BoxID)
    buff_dist = str(int(bd[bd_key][0]))
    
    #SET buffer command and print to check it
    buffer_cmd = 'ogr2ogr '+outputbuffer_path+" "+terminusbox_path+' -dialect sqlite -sql "SELECT ST_Buffer(geometry, '+buff_dist+") AS geometry,*FROM 'Box"+BoxID+"'"+'" -f "ESRI Shapefile"'
    #print(export_GDALpath, buffer_cmd)
    
    subprocess.call(export_GDALpath+buffer_cmd, shell=True)
    
    print("Box"+BoxID)


Box001
Box002
Box004
Box033
Box120
Box174
Box235
Box259
Box277
Box531


The terminus box shapefiles are then rasterized (to be used as a mask during the WTMM filering) using the GDAL **gdal_rasterize** command and subset to the buffer zone using the GDAL **gdalwarp** command using the following syntax:

1) Rasterize

    gdal_rasterize -burn 1.0 -tr x_resolution y_resolution -a_nodata 0.0 path_to_terminusbox.shp path_to_terminusbox_raster.TIF

The x_resolution and y_resolution are set to be 15.0 (meters) to match the Landsat B8 resolution.
    
2) Subset

    gdalwarp -cutline path_to_Buffer###.shp -crop_to_cutline path_to_terminusbox_raster.TIF path_to_subset_raster_cut.TIF

In [141]:
#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for BoxID in BOXIDS:
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    buffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #output raster path:
    terminusraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+".TIF"
    cutraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+"_cut.TIF"
    
    #SET commands and print to check
    rasterize_cmd = 'gdal_rasterize -burn 1.0 -tr 15.0 15.0 -a_nodata 0.0 '+terminusbox_path+' '+terminusraster_path
    subsetbuffer_cmd = 'gdalwarp -cutline '+buffer_path+' -crop_to_cutline '+terminusraster_path+" "+cutraster_path
    #print(export_GDALpath+rasterize_cmd)
    #print(export_GDALpath+subsetbuffer_cmd)
    
    #RASTERIZE & SUBSET
    subprocess.call(export_GDALpath+rasterize_cmd, shell=True)
    subprocess.call(export_GDALpath+subsetbuffer_cmd, shell=True)
    
    print("Box"+BoxID)
    

Box001
Box002
Box004
Box033
Box120
Box174
Box235
Box259
Box277
Box531


## 3) Calculate average flow direction (weighted by magnitude) for each glacier

The following code processes 2016-2017 ice velocity data from the ESA Cryoportal to determine each glacier of interest's weighted average flow direction. The ice velocity direction (calculated from yx velocity) and the velocity magnitude at each glacier's terminus  is subset using the terminus box shapefile using a GDAL command (**gdalwarp**) with the following syntax:

    gdalwarp -cutline path_to_terminusbox.shp -crop_to_cutline path_to_input_velocity.TIF path_to_output_velocity_at_term###.TIF

In [20]:
#export GDAL path command:
export_GDALpath = 'export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ; '

for BoxID in BOXIDS:
    #SET paths to the terminus box shapefiles and velocity data
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    v_dir_path = basepath+'dir_degree_yx_velocity.tif'
    v_mag_path = basepath+'magnitude_velocity.tif'  
    
    #SET output paths
    v_dir_output = basepath+"Box"+BoxID+"/dir_degree_yx_velocity_at_term"+BoxID+".tif"
    v_mag_output = basepath+"Box"+BoxID+"/magnitude_velocity_at_term"+BoxID+".tif"
    
    #SET velocity subset commands and print to check it
    v_subset_dir_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+v_dir_path+" "+v_dir_output
    v_subset_mag_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+v_mag_path+" "+v_mag_output
    #print(export_GDALpath+v_subset_dir_cmd)
    #print(export_GDALpath+v_subset_mag_cmd)
    
    #SUBSET velocity rasters
    subprocess.call(export_GDALpath+v_subset_dir_cmd, shell=True)
    subprocess.call(export_GDALpath+v_subset_mag_cmd, shell=True)
    
    print("Box"+BoxID)

Box001
Box002
Box004
Box033
Box120
Box174
Box235
Box259
Box277
Box531


Next, these subset velocity rasters are opened using the **rasterio** package and read into arrays. They are filtered for anomalous values and the velocity magnitudes are converted into weights. Then the **numpy.average()** function is used to calculated the weighted average flow directions where the flow directions of the pixels where the highest velocities are found are weighted more. 

The resulting average flow direction will be representative of the glacier's main flow. These directions will be used to rotate the images of the glaciers so that their flow is due right.

In [6]:
#CREATE list of glacier average flow directions:
boxes = []
rot_angles = []
max_magnitudes = []


for BoxID in BOXIDS :    
        #READ velocity direction and magnitude data at terminus for each glacier into an array
        direction = rasterio.open(basepath+'Box'+BoxID+'/dir_degree_yx_velocity_at_term'+BoxID+'.tif', "r")
        dir_array = direction.read()
        #print(dir_array.shape)
        magnitude = rasterio.open(basepath+'Box'+BoxID+'/magnitude_velocity_at_term'+BoxID+'.tif', "r")
        mag_array = magnitude.read()
        #print(mag_array.shape)
        
        
        #RESHAPE direction array and remove anomalous values
        dir2 = dir_array.reshape(dir_array.shape[1] * dir_array.shape[2])
        #direction must be between 180 and -180 degrees
        mask_dir2 = (dir2 < 180) & (dir2 > -180)
        masked_dir = dir2[mask_dir2]
        #print(masked_dir.min(), masked_dir.max())

        #RESHAPE magnitude array and remove anomalous values
        mag2 = mag_array.reshape(mag_array.shape[1] * mag_array.shape[2])
        #magnitude must be between 0 and 10 m/d
        mask_mag2 = (mag2 < 100) & (mag2 > 0)
        masked_mag = mag2[mask_mag2]
        #print(masked_mag.min(), masked_mag.max())
        
    
        #CALCULATE weights (0 - 1) from magnitudes
        mag_range = masked_mag.max() - masked_mag.min()
        stretch = 1/mag_range
        weights = stretch*(masked_mag - masked_mag.min())
        #print(weights.min(), weights.max()) #should be between 0 and 1
        #print(weights.shape, masked_dir.shape)
        
        
        #CALCULATE the weighted average rotation angle
        avg_dir = np.average(masked_dir, weights=weights)
        #print(avg_dir)
        
        #APPEND the rotation angles to the dictionary
        rot_angles.append(avg_dir)
        
        #APPEND the maximum flow magnitude
        max_magnitudes.append(masked_mag.max())
        
        #APPEND the BoxID
        boxes.append(BoxID)

#create dataframe with the calculations
velocities_df = pd.DataFrame(list(zip(boxes, rot_angles, max_magnitudes)), columns=['BoxID','Flow_dir', 'Max_speed'])
velocities_df = velocities_df.sort_values(by='BoxID')
velocities_df

Unnamed: 0,BoxID,Flow_dir,Max_speed
0,1,56.284283,0.043774
1,2,155.987228,3.583226
2,4,-3.483434,0.623082
3,33,142.118164,0.771658
4,120,-77.386398,0.277889
5,174,12.677643,0.914503
6,235,-145.850769,0.157097
7,259,98.999275,3.074901
8,277,-65.120186,0.28602
9,531,78.835213,0.040785


In [8]:
#EXPORT MAX VELOCITY AND AVERAGE FLOW DIRECTION TO A .CSV FILE
#write the data frame to csv file
velocities_df.to_csv(path_or_buf = basepath+'Glacier_velocities.csv', sep=',')