# Greenland peripherial glacier pre-image download processing

#### Jukes Liu
__Last modified 10-15-2019.__

## 1) Import packages, set base path, set glaciers of interest by BoxID

In [2]:
import subprocess
import numpy as np
import os
import pandas as pd
import rasterio
import fiona
from shapely.geometry import Polygon, Point
import shapely
import math

#SET basepath to your own folder
basepath='/home/jukes/Documents/Sample_glaciers/'

#ENTER list of glaciers of interest by BoxID
#make this into a widget where you can enter them in?
BOXIDS = ['001', '002', '004', '033', '120', '174', '235', '259', '277', '531'];
# BOXIDS = ['Alison', 'Helheim']
# BOXIDS = ['147', '148', '149', '150', '152', '190', '191', '192', '193', '194', '195', '195', '196', '213', '214', '215']

In [3]:
def distance(x1, y1, x2, y2):
    dist = math.sqrt(((x2-x1)**2) + ((y2-y1)**2))
    return dist

## 2) Create buffer zone around terminus boxes and rasterize/subset terminus boxes

The following code pulls the buffer distances around the terminus boxes from an existing .csv file with the exported attributes tables for the peripheral glacier terminus boxes. These buffer distances will be used to create a buffer zone to subset the Landsat scenes.

In [5]:
buffers = []

#Calculate a buffer distance around the terminus box using the UTM projected boxes
for BoxID in BOXIDS:
    buff_distances = []

    for file in os.listdir(basepath+'Box'+BoxID+'/'):
        if 'UTM' in file and '.shp' in file:
            print(file)
            boxpath = basepath+"Box"+BoxID+"/"+file
#             print(boxpath)
            
            termbox = fiona.open(boxpath)
            #grab the box feature:
            box = termbox.next()
            box_geom= box.get('geometry')
            box_coords = box_geom.get('coordinates')[0]
#             print(box_geom)
            
            points = []
            for coord_pair in box_coords:
                lat = coord_pair[0]
                lon = coord_pair[1]
                
                points.append([lat, lon])
            
            #Calculate distance between 1 and 2 and distance between 2 and 3
            #pick the longer one (length)
            coord1 = points[0]
            coord2 = points[1]
            coord3 = points[2]
            
            #1 and 2
            dist1 = distance(coord1[0], coord1[1], coord2[0], coord2[1])       
            #2 and 3
            dist2 = distance(coord2[0], coord2[1], coord3[0], coord3[1])
            
            buff_dist = int(np.max([dist1, dist2]))
#             print(buff_dist)
            buff_distances.append(buff_dist)
    
    buffer = buff_distances[0]
    buffers.append(buffer)

buff_df = pd.DataFrame(list(zip(BOXIDS, buffers)), columns=['BoxID', 'Buff_dist_m'])
buff_df

Box147_UTM_23.shp
Box147_UTM_24.shp
Buffer147_UTM_23.shp
Buffer147_UTM_24.shp
Box148_UTM_23.shp
Box148_UTM_24.shp
Buffer148_UTM_24.shp
Buffer148_UTM_23.shp
Box149_UTM_24.shp
Box149_UTM_23.shp
Buffer149_UTM_23.shp
Buffer149_UTM_24.shp
Buffer150_UTM_24.shp
Box150_UTM_24.shp
Box150_UTM_23.shp
Buffer150_UTM_23.shp
Box152_UTM_23.shp
Buffer152_UTM_24.shp
Box152_UTM_24.shp
Buffer152_UTM_23.shp
Box190_UTM_24.shp
Buffer190_UTM_24.shp
Box190_UTM_23.shp
Buffer190_UTM_23.shp
Box191_UTM_24.shp
Box191_UTM_23.shp
Buffer191_UTM_23.shp
Buffer191_UTM_24.shp
Buffer192_UTM_23.shp
Buffer192_UTM_24.shp
Box192_UTM_23.shp
Box192_UTM_24.shp
Box193_UTM_23.shp
Buffer193_UTM_23.shp
Buffer193_UTM_24.shp
Box193_UTM_24.shp
Buffer194_UTM_24.shp
Box194_UTM_23.shp
Box194_UTM_24.shp
Buffer194_UTM_23.shp
Buffer195_UTM_23.shp
Buffer195_UTM_24.shp
Box195_UTM_24.shp
Box195_UTM_23.shp
Buffer195_UTM_23.shp
Buffer195_UTM_24.shp
Box195_UTM_24.shp
Box195_UTM_23.shp
Buffer196_UTM_24.shp
Buffer196_UTM_23.shp
Box196_UTM_23.shp
Box1

  from ipykernel import kernelapp as app


Unnamed: 0,BoxID,Buff_dist_m
0,147,1083
1,148,861
2,149,662
3,150,34
4,152,934
5,190,1946
6,191,1685
7,192,33
8,193,1509
9,194,87


The next section creates a buffer zone using GDAL command **ogr2ogr** with the following syntax:

    ogr2ogr Buffer###.shp path_to_terminusbox###.shp  -dialect sqlite -sql "SELECT ST_Buffer(geometry, buffer_distance) AS geometry,*FROM 'Box###'" -f "ESRI Shapefile"

In [6]:
for index, row in buff_df.iterrows():
    BoxID = row['BoxID']
    buff_dist = str(row['Buff_dist_m'])
    
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    outputbuffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #SET buffer command and print to check it
    buffer_cmd = 'ogr2ogr '+outputbuffer_path+" "+terminusbox_path+' -dialect sqlite -sql "SELECT ST_Buffer(geometry, '+buff_dist+") AS geometry,*FROM 'Box"+BoxID+"'"+'" -f "ESRI Shapefile"'
    print(buffer_cmd)
    
    subprocess.call(buffer_cmd, shell=True)
    
    print("Box"+BoxID)

ogr2ogr /home/jukes/Documents/Sample_glaciers/Box147/Buffer147.shp /home/jukes/Documents/Sample_glaciers/Box147/Box147.shp -dialect sqlite -sql "SELECT ST_Buffer(geometry, 1083) AS geometry,*FROM 'Box147'" -f "ESRI Shapefile"
Box147
ogr2ogr /home/jukes/Documents/Sample_glaciers/Box148/Buffer148.shp /home/jukes/Documents/Sample_glaciers/Box148/Box148.shp -dialect sqlite -sql "SELECT ST_Buffer(geometry, 861) AS geometry,*FROM 'Box148'" -f "ESRI Shapefile"
Box148
ogr2ogr /home/jukes/Documents/Sample_glaciers/Box149/Buffer149.shp /home/jukes/Documents/Sample_glaciers/Box149/Box149.shp -dialect sqlite -sql "SELECT ST_Buffer(geometry, 662) AS geometry,*FROM 'Box149'" -f "ESRI Shapefile"
Box149
ogr2ogr /home/jukes/Documents/Sample_glaciers/Box150/Buffer150.shp /home/jukes/Documents/Sample_glaciers/Box150/Box150.shp -dialect sqlite -sql "SELECT ST_Buffer(geometry, 34) AS geometry,*FROM 'Box150'" -f "ESRI Shapefile"
Box150
ogr2ogr /home/jukes/Documents/Sample_glaciers/Box152/Buffer152.shp /home

The terminus box shapefiles are then rasterized (to be used as a mask during the WTMM filering) using the GDAL **gdal_rasterize** command and subset to the buffer zone using the GDAL **gdalwarp** command using the following syntax:

1) Rasterize

    gdal_rasterize -burn 1.0 -tr x_resolution y_resolution -a_nodata 0.0 path_to_terminusbox.shp path_to_terminusbox_raster.TIF

The x_resolution and y_resolution are set to be 15.0 (meters) to match the Landsat B8 resolution.
    
2) Subset

    gdalwarp -cutline path_to_Buffer###.shp -crop_to_cutline path_to_terminusbox_raster.TIF path_to_subset_raster_cut.TIF

In [27]:
for index, row in buff_df.iterrows():
    BoxID = row['BoxID']
    #SET path to the terminus box shapefiles
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"
    buffer_path = basepath+"Box"+BoxID+"/Buffer"+BoxID+".shp"
    
    #output raster path:
    terminusraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+".TIF"
    cutraster_path = basepath+"Box"+BoxID+"/Box"+BoxID+"_cut.TIF"
    
    #SET commands and print to check
    rasterize_cmd = 'gdal_rasterize -burn 1.0 -tr 15.0 15.0 -a_nodata 0.0 '+terminusbox_path+' '+terminusraster_path
    subsetbuffer_cmd = 'gdalwarp -cutline '+buffer_path+' -crop_to_cutline '+terminusraster_path+" "+cutraster_path
    #print(export_GDALpath+rasterize_cmd)
    #print(export_GDALpath+subsetbuffer_cmd)
    
    #RASTERIZE & SUBSET
    subprocess.call(rasterize_cmd, shell=True)
    subprocess.call(subsetbuffer_cmd, shell=True)
    
    print("Box"+BoxID)
    

Box147
Box148
Box149
Box150
Box152
Box190
Box191
Box192
Box193
Box194
Box195
Box195
Box196
Box213
Box214
Box215


## 3) Calculate average flow direction (weighted by magnitude) for each glacier

The following code processes 2016-2017 ice velocity data from the ESA Cryoportal to determine each glacier of interest's weighted average flow direction. The ice velocity direction (calculated from yx velocity) and the velocity magnitude at each glacier's terminus  is subset using the terminus box shapefile using a GDAL command (**gdalwarp**) with the following syntax:

    gdalwarp -cutline path_to_terminusbox.shp -crop_to_cutline path_to_input_velocity.TIF path_to_output_velocity_at_term###.TIF

In [4]:
for BoxID in BOXIDS:
    #SET paths to the terminus box shapefiles and velocity data
    terminusbox_path = basepath+"Box"+BoxID+"/Box"+BoxID+".shp"

    for vdate in ['2014_15', '2015_16', '2016_17']:
        vdir = vdate+'_velocity_dir_degree_yx.tif'
        vmag = vdate+'_velocity_mag.tif'
        
        #set input paths
        vdir_in = basepath+vdir
        vmag_in = basepath+vmag
    
        #SET output paths
        vdir_out = basepath+"Box"+BoxID+"/"+vdir
        vmag_out = basepath+"Box"+BoxID+"/"+vmag

        #SET velocity subset commands and print to check it
        v_subset_dir_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+vdir_in+" "+vdir_out
        v_subset_mag_cmd = 'gdalwarp -cutline '+terminusbox_path+' -crop_to_cutline '+vmag_in+" "+vmag_out
        print(v_subset_dir_cmd)
        print(v_subset_mag_cmd)

        #SUBSET velocity rasters
        subprocess.call(v_subset_dir_cmd, shell=True)
        subprocess.call(v_subset_mag_cmd, shell=True)
    
    print("Box"+BoxID)

gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box001/Box001.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2014_15_velocity_dir_degree_yx.tif /home/jukes/Documents/Sample_glaciers/Box001/2014_15_velocity_dir_degree_yx.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box001/Box001.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2014_15_velocity_mag.tif /home/jukes/Documents/Sample_glaciers/Box001/2014_15_velocity_mag.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box001/Box001.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2015_16_velocity_dir_degree_yx.tif /home/jukes/Documents/Sample_glaciers/Box001/2015_16_velocity_dir_degree_yx.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box001/Box001.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2015_16_velocity_mag.tif /home/jukes/Documents/Sample_glaciers/Box001/2015_16_velocity_mag.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box001/B

gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box235/Box235.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2015_16_velocity_dir_degree_yx.tif /home/jukes/Documents/Sample_glaciers/Box235/2015_16_velocity_dir_degree_yx.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box235/Box235.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2015_16_velocity_mag.tif /home/jukes/Documents/Sample_glaciers/Box235/2015_16_velocity_mag.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box235/Box235.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2016_17_velocity_dir_degree_yx.tif /home/jukes/Documents/Sample_glaciers/Box235/2016_17_velocity_dir_degree_yx.tif
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/Box235/Box235.shp -crop_to_cutline /home/jukes/Documents/Sample_glaciers/2016_17_velocity_mag.tif /home/jukes/Documents/Sample_glaciers/Box235/2016_17_velocity_mag.tif
Box235
gdalwarp -cutline /home/jukes/Documents/Sample_glaciers/B

Next, these subset velocity rasters are opened using the **rasterio** package and read into arrays. They are filtered for anomalous values and the velocity magnitudes are converted into weights. Then the **numpy.average()** function is used to calculated the weighted average flow directions where the flow directions of the pixels where the highest velocities are found are weighted more. 

The resulting average flow direction will be representative of the glacier's main flow. These directions will be used to rotate the images of the glaciers so that their flow is due right.

In [9]:
#CREATE list of glacier average flow directions:
boxes = []
avg_rot = []
max_mag = []
years = []


for BoxID in BOXIDS :
    rot_angles = []
    max_magnitudes = []
    
    for vdate in ['2014_15', '2015_16', '2016_17']:
        #READ velocity direction and magnitude data at terminus for each glacier into an array
        vdir = vdate+'_velocity_dir_degree_yx.tif'
        vmag = vdate+'_velocity_mag.tif'  
                
        direction = rasterio.open(basepath+"Box"+BoxID+"/"+vdir, "r")
        dir_array = direction.read()

        magnitude = rasterio.open(basepath+"Box"+BoxID+"/"+vmag, "r")
        mag_array = magnitude.read()
       
        #RESHAPE direction array and remove anomalous values
        dir2 = dir_array.reshape(dir_array.shape[1] * dir_array.shape[2])
        #direction must be between 180 and -180 degrees
        mask_dir2 = (dir2 < 180) & (dir2 > -180)
        masked_dir = dir2[mask_dir2]
#         print(masked_dir.min(), masked_dir.max())

        #RESHAPE magnitude array and remove anomalous values
        mag2 = mag_array.reshape(mag_array.shape[1] * mag_array.shape[2])
        #magnitude must be between 0 and 10 m/d
        mask_mag2 = (mag2 < 100) & (mag2 > 0)
        masked_mag = mag2[mask_mag2]
#         print(masked_mag.min(), masked_mag.max())
        
        print(len(masked_dir), len(masked_mag))
    
        #CALCULATE weights (0 - 1) from magnitudes
        mag_range = masked_mag.max() - masked_mag.min()
        stretch = 1/mag_range
        weights = stretch*(masked_mag - masked_mag.min())
        #print(weights.min(), weights.max()) #should be between 0 and 1
#         print(weights.shape, masked_dir.shape)
        
        
        #CALCULATE the weighted average rotation angle
        avg_dir = np.average(masked_dir, weights=weights)
        avg_dir = avg_dir*180/np.pi

#         print(avg_dir)
        
        #APPEND the rotation angles to the dictionary
        if masked_dir.min() == masked_dir.max():
            rot_angles.append(masked_dir.min())
        else:
            rot_angles.append(avg_dir)
        
        #APPEND the maximum flow magnitude
        max_magnitudes.append(masked_mag.max())
    
    rot_angles = rot_angles[:-1]
    max_magnitudes = max_magnitudes[:-1]
        
    #APPEND the final values
#     print(BoxID)
#     print(rot_angles)
#     print(max_magnitudes)
    avg_rot.append(np.average(rot_angles))
    max_mag.append(np.average(max_magnitudes))
    boxes.append(BoxID)
#   years.append(vdate)

#create dataframe with the calculations
velocities_df = pd.DataFrame(list(zip(boxes,avg_rot, max_mag)), columns=['BoxID','Flow_dir', 'Max_speed'])
velocities_df = velocities_df.sort_values(by='BoxID')
velocities_df = velocities_df.drop_duplicates()
velocities_df

4 4
4 4
4 4
100 100
100 100
94 94
95 95
94 95


TypeError: Axis must be specified when shapes of a and weights differ.

In [57]:
#EXPORT MAX VELOCITY AND AVERAGE FLOW DIRECTION TO A .CSV FILE
#write the data frame to csv file
velocities_df.to_csv(path_or_buf = basepath+'Glacier_velocities_SE.csv', sep=',')

In [58]:
print(list(velocities_df.Flow_dir))

[-101.90171300115752, -16.13300927240767, 94.15238868963074, -0.147796630859375, 60.97293650371104, 36.81103132421692, 63.42911521792994, -89.90698215817176, -110.636708741998, -125.85510104302762, -4.4225679781507665, -0.04147058725357056, 86.27414114242538, -11.079167036633997, -53.44974951031963]


## 4) Rotate all images by flow direction

In [None]:
# #make results directory in BoxID folder if it doesn't already exist
# for BoxID in BoxIDs:
#     if os.path.exists(basepath+"Box"+BoxID+'/rotated/'):
#         print("Already exists.")
#         #OTHERWISE, create the folder and download into it
#     else:
#         os.mkdir(basepath+"Box"+BoxID+'/rotated/')
#         print("Folder made for Box"+BoxID)

In [None]:
# for folder in os.listdir(basepath):
#     BoxID = folder[3:]
#     for file in os.listdir(basepath+folder):
# #         print(file)
#         if file.endswith('PS_cut.png') and len(file) == 50:
#             img  = Image.open(basepath+folder+'/'+file)
#             #rotate the image by the flow direction from flowspeed_df
#             rotated     = img.rotate(-float(flowspeed_df.loc[BoxID, 'Flow_dir']))
#             rotated.save(basepath+folder+'/rotated/R_'+file)
# #             print(file)
# #             print(flowspeed_df.loc['001', 'Flow_dir'])

## 5) Resize images to minimum dimensions