# Greenland peripherial glacier pre-image download processing

#### Jukes Liu
__Last modified 10-15-2019.__

## 1) Import packages, set base path, set glaciers of interest by BoxID

In [22]:
import subprocess
import numpy as np
import os
import pandas as pd
import rasterio
import fiona
from shapely.geometry import Polygon, Point
import shapely
import math

#SET basepath to your own folder
basepath='/home/jukes/Documents/Sample_glaciers/'

#ENTER list of glaciers of interest by BoxID
#make this into a widget where you can enter them in?
# BOXIDS = ['001', '002', '004', '033', '120', '174', '235', '259', '277', '531'];
BOXIDS = ['Alison', 'Helheim']

In [26]:
def distance(x1, y1, x2, y2):
    dist = math.sqrt(((x2-x1)**2) + ((y2-y1)**2))
    return dist

## 2) Create buffer zone around terminus boxes and rasterize/subset terminus boxes

The following code pulls the buffer distances around the terminus boxes from an existing .csv file with the exported attributes tables for the peripheral glacier terminus boxes. These buffer distances will be used to create a buffer zone to subset the Landsat scenes.

In [60]:
buffers = []

#Calculate a buffer distance around the terminus box using the UTM projected boxes
for BoxID in BOXIDS:
    buff_distances = []

    for file in os.listdir(basepath+'Box'+BoxID+'/'):
        if 'UTM' in file and '.shp' in file:
            print(file)
            boxpath = basepath+"Box"+BoxID+"/"+file
#             print(boxpath)
            
            termbox = fiona.open(boxpath)
            #grab the box feature:
            box = termbox.next()
            box_geom= box.get('geometry')
            box_coords = box_geom.get('coordinates')[0]
#             print(box_geom)
            
            points = []
            for coord_pair in box_coords:
                lat = coord_pair[0]
                lon = coord_pair[1]
                
                points.append([lat, lon])
            
            #Calculate distance between 1 and 2 and distance between 2 and 3
            #pick the longer one (length)
            coord1 = points[0]
            coord2 = points[1]
            coord3 = points[2]
            
            #1 and 2
            dist1 = distance(coord1[0], coord1[1], coord2[0], coord2[1])       
            #2 and 3
            dist2 = distance(coord2[0], coord2[1], coord3[0], coord3[1])
            
            buff_dist = int(np.max([dist1, dist2]))
#             print(buff_dist)
            buff_distances.append(buff_dist)
    
    buffer = buff_distances[0]
    buffers.append(buffer)

buff_df = pd.DataFrame(list(zip(BOXIDS, buffers)), columns=['BoxID', 'Buff_dist_m'])
buff_df

BoxAlison_UTM_22.shp
BoxAlison_UTM_21.shp
BoxHelheim_UTM_24.shp


  from ipykernel import kernelapp as app


Unnamed: 0,BoxID,Buff_dist_m
0,Alison,7664
1,Helheim,8193


The next section creates a buffer zone using GDAL command **ogr2ogr** with the following syntax:

    ogr2ogr Buffer###.shp path_to_terminusbox###.shp  -dialect sqlite -sql "SELECT ST_Buffer(geometry, buffer_distance) AS geometry,*FROM 'Box###'" -f "ESRI Shapefile"

In [64]:
for index, row in buff_df.iterrows():
    BoxID = row['BoxID']
    buff_dist = str(row['Buff_dist_m'])
    
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    outputbuffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #SET buffer command and print to check it
    buffer_cmd = 'ogr2ogr '+outputbuffer_path+" "+terminusbox_path+' -dialect sqlite -sql "SELECT ST_Buffer(geometry, '+buff_dist+") AS geometry,*FROM 'Box"+BoxID+"'"+'" -f "ESRI Shapefile"'
    print(export_GDALpath, buffer_cmd)
    
    subprocess.call(buffer_cmd, shell=True)
    
    print("Box"+BoxID)

export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ;  ogr2ogr /home/jukes/Documents/Sample_glaciers/BoxAlison/BufferAlison.shp /home/jukes/Documents/Sample_glaciers/BoxAlison/BoxAlison.shp -dialect sqlite -sql "SELECT ST_Buffer(geometry, 7664) AS geometry,*FROM 'BoxAlison'" -f "ESRI Shapefile"
BoxAlison
export PATH=/Library/Frameworks/GDAL.framework/Programs:$PATH ;  ogr2ogr /home/jukes/Documents/Sample_glaciers/BoxHelheim/BufferHelheim.shp /home/jukes/Documents/Sample_glaciers/BoxHelheim/BoxHelheim.shp -dialect sqlite -sql "SELECT ST_Buffer(geometry, 8193) AS geometry,*FROM 'BoxHelheim'" -f "ESRI Shapefile"
BoxHelheim


The terminus box shapefiles are then rasterized (to be used as a mask during the WTMM filering) using the GDAL **gdal_rasterize** command and subset to the buffer zone using the GDAL **gdalwarp** command using the following syntax:

1) Rasterize

    gdal_rasterize -burn 1.0 -tr x_resolution y_resolution -a_nodata 0.0 path_to_terminusbox.shp path_to_terminusbox_raster.TIF

The x_resolution and y_resolution are set to be 15.0 (meters) to match the Landsat B8 resolution.
    
2) Subset

    gdalwarp -cutline path_to_Buffer###.shp -crop_to_cutline path_to_terminusbox_raster.TIF path_to_subset_raster_cut.TIF

In [65]:
for index, row in buff_df.iterrows():
    BoxID = row['BoxID']
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    buffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #output raster path:
    terminusraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+".TIF"
    cutraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+"_cut.TIF"
    
    #SET commands and print to check
    rasterize_cmd = 'gdal_rasterize -burn 1.0 -tr 15.0 15.0 -a_nodata 0.0 '+terminusbox_path+' '+terminusraster_path
    subsetbuffer_cmd = 'gdalwarp -cutline '+buffer_path+' -crop_to_cutline '+terminusraster_path+" "+cutraster_path
    #print(export_GDALpath+rasterize_cmd)
    #print(export_GDALpath+subsetbuffer_cmd)
    
    #RASTERIZE & SUBSET
    subprocess.call(rasterize_cmd, shell=True)
    subprocess.call(subsetbuffer_cmd, shell=True)
    
    print("Box"+BoxID)
    

BoxAlison
BoxHelheim


## 3) Calculate average flow direction (weighted by magnitude) for each glacier

The following code processes 2016-2017 ice velocity data from the ESA Cryoportal to determine each glacier of interest's weighted average flow direction. The ice velocity direction (calculated from yx velocity) and the velocity magnitude at each glacier's terminus  is subset using the terminus box shapefile using a GDAL command (**gdalwarp**) with the following syntax:

    gdalwarp -cutline path_to_terminusbox.shp -crop_to_cutline path_to_input_velocity.TIF path_to_output_velocity_at_term###.TIF

In [2]:
for BoxID in BOXIDS:
    #SET paths to the terminus box shapefiles and velocity data
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    v_dir_path = basepath+'dir_degree_yx_velocity.tif'
    v_mag_path = basepath+'magnitude_velocity.tif'  
    
    #SET output paths
    v_dir_output = basepath+"Box"+BoxID+"/dir_degree_yx_velocity_at_term"+BoxID+".tif"
    v_mag_output = basepath+"Box"+BoxID+"/magnitude_velocity_at_term"+BoxID+".tif"
    
    #SET velocity subset commands and print to check it
    v_subset_dir_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+v_dir_path+" "+v_dir_output
    v_subset_mag_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+v_mag_path+" "+v_mag_output
    #print(export_GDALpath+v_subset_dir_cmd)
    #print(export_GDALpath+v_subset_mag_cmd)
    
    #SUBSET velocity rasters
    subprocess.call(v_subset_dir_cmd, shell=True)
    subprocess.call(v_subset_mag_cmd, shell=True)
    
    print("Box"+BoxID)

BoxAlison
BoxHelheim


Next, these subset velocity rasters are opened using the **rasterio** package and read into arrays. They are filtered for anomalous values and the velocity magnitudes are converted into weights. Then the **numpy.average()** function is used to calculated the weighted average flow directions where the flow directions of the pixels where the highest velocities are found are weighted more. 

The resulting average flow direction will be representative of the glacier's main flow. These directions will be used to rotate the images of the glaciers so that their flow is due right.

In [6]:
#CREATE list of glacier average flow directions:
boxes = []
rot_angles = []
max_magnitudes = []


for BoxID in BOXIDS :    
        #READ velocity direction and magnitude data at terminus for each glacier into an array
        direction = rasterio.open(basepath+'Box'+BoxID+'/dir_degree_yx_velocity_at_term'+BoxID+'.tif', "r")
        dir_array = direction.read()
        #print(dir_array.shape)
        magnitude = rasterio.open(basepath+'Box'+BoxID+'/magnitude_velocity_at_term'+BoxID+'.tif', "r")
        mag_array = magnitude.read()
        #print(mag_array.shape)
        
        
        #RESHAPE direction array and remove anomalous values
        dir2 = dir_array.reshape(dir_array.shape[1] * dir_array.shape[2])
        #direction must be between 180 and -180 degrees
        mask_dir2 = (dir2 < 180) & (dir2 > -180)
        masked_dir = dir2[mask_dir2]
        #print(masked_dir.min(), masked_dir.max())

        #RESHAPE magnitude array and remove anomalous values
        mag2 = mag_array.reshape(mag_array.shape[1] * mag_array.shape[2])
        #magnitude must be between 0 and 10 m/d
        mask_mag2 = (mag2 < 100) & (mag2 > 0)
        masked_mag = mag2[mask_mag2]
        #print(masked_mag.min(), masked_mag.max())
        
    
        #CALCULATE weights (0 - 1) from magnitudes
        mag_range = masked_mag.max() - masked_mag.min()
        stretch = 1/mag_range
        weights = stretch*(masked_mag - masked_mag.min())
        #print(weights.min(), weights.max()) #should be between 0 and 1
        #print(weights.shape, masked_dir.shape)
        
        
        #CALCULATE the weighted average rotation angle
        avg_dir = np.average(masked_dir, weights=weights)
        #print(avg_dir)
        
        #APPEND the rotation angles to the dictionary
        rot_angles.append(avg_dir)
        
        #APPEND the maximum flow magnitude
        max_magnitudes.append(masked_mag.max())
        
        #APPEND the BoxID
        boxes.append(BoxID)

#create dataframe with the calculations
velocities_df = pd.DataFrame(list(zip(boxes, rot_angles, max_magnitudes)), columns=['BoxID','Flow_dir', 'Max_speed'])
velocities_df = velocities_df.sort_values(by='BoxID')
velocities_df

Unnamed: 0,BoxID,Flow_dir,Max_speed
0,1,56.284283,0.043774
1,2,155.987228,3.583226
2,4,-3.483434,0.623082
3,33,142.118164,0.771658
4,120,-77.386398,0.277889
5,174,12.677643,0.914503
6,235,-145.850769,0.157097
7,259,98.999275,3.074901
8,277,-65.120186,0.28602
9,531,78.835213,0.040785


In [8]:
#EXPORT MAX VELOCITY AND AVERAGE FLOW DIRECTION TO A .CSV FILE
#write the data frame to csv file
velocities_df.to_csv(path_or_buf = basepath+'Glacier_velocities.csv', sep=',')