# Overview
This notebook goes through locating repeat tracks and estimating the elevation change between points on repeated tracks with a distance of < 5m. We specifically focus on tracks from the 2019 thaw season

In [51]:
import numpy as np
import pandas as pd

# Locating potential repeat tracks

In [60]:
def find_repeat_tracks(data_dir):
    "Searches through a list of ATL06 files in the specified drectorry (data_dir) and finds potential repeats, defined as two beam of the same RGT and spot that are \
    within acorss-track distances of 45 m or less. The resulting list is returned as a pndas dataframe"
    #import packages
    import glob
    import pandas as pd
    import os
    import numpy as np
    import time
    #define helper functions for pulling information from the file names
    def get_rgt(fname):
        return int(fname[31:35])
    def get_cycle(fname):
        return int(fname[36])
    def get_date(fname):
        return fname[16:24]
    def get_beam(fname):
        return(fname[47:51])
    def read_h5(fname, vnames=[]):
        """ Simple HDF5 reader. """
        with h5py.File(fname, 'r') as f:
            return [f[v][:] for v in vnames]
    
    start = time.time() #start timer
    os.chdir(data_dir)
    files = glob.glob('*.h5') #get list of all ATL06 files
    #intialize lists to store variables
    #file names
    file1 = []
    file2 = []
    rgt = [] #RGT number
    #dates of repeat passes in YYYMMDD format
    date_1 = []
    date_2 = []
    #dates of repeat passes in year/decimal format
    date_1_decimal = []
    date_2_decimal = []
    
    #loop through files
    for i in range(1, len(files)):
        #read in files and coordinates
        file_1 = files[i]
        x_atc1, y_atc1,t1 = read_h5(file_1,['x_atc', 'y_atc', 't_year']) #get along and across-track coordiantes
        for j in range(i+1, len(files)):
            file_2 = files[j]
            x_atc2, y_atc2, t2 = read_h5(file_2,['x_atc', 'y_atc', 't_year'])
            
            #select tracks of the same rgt and beam but different dates
            if((get_rgt(file_1) == get_rgt(file_2)) &(get_date(file_1) != get_date(file_2)) & (get_beam(file_1) == get_beam(file_2))):
                x_atc = list(set(x_atc1) & set(x_atc2)) #get list of along-track coordinates common to both data sets
                y1 = []
                for x in range(0, len(x_atc1)):
                    if(x_atc1[x] in x_atc): #save points in the common list of rgts
                        y1.append(y_atc1[x])
                y2 = []
                for x in range(0, len(x_atc2)):
                    if(x_atc2[x] in x_atc):
                        y2.append(y_atc2[x])
                if ((len(y1)==0) or (len(y2) ==0)):
                    continue
                if (np.max(abs(np.array(y2) - np.array(y1))) < 45): #only keep beams with across-track distance < 45 m (could be modified to be stricter)
                    #save file name, date, and rgt
                    file1.append(file_1)
                    file2.append(file_2)
                    rgt.append(get_rgt(file_1))
                    date_1.append(get_date(file_1))
                    date_1_decimal.append(t1[0])
                    date_2.append(get_date(file_2))
                    date_2_decimal.append(t2[0])
                    distance = np.sqrt((np.array(y1) - np.array(y2))**2)
        if (i%50 ==0): #update progress every 50 files
            print('progress: ', i ,'files checked')
    #convert output to dataframe
    data_dict = {'file_1':file1, 'file_2': file2, 'rgt':rgt, 'date_1': date_1, 'date_1_decimal': date_1_decimal, \
                 'date_2_decimal': date_2_decimal, 'date_2': date_2}
    data = pd.DataFrame(data_dict)
    end = time.time()
    print('time elapsed: ', (end - start))
    os.chdir('..')
    return data        

In [53]:
#find all potetial repeats from our ATL06 data
repeat_list = find_repeat_tracks('Anaktuvuk_v2_ATL06_reduced_all_slopes')

progress:  50 files checked
progress:  100 files checked
progress:  150 files checked
progress:  200 files checked
progress:  250 files checked
progress:  300 files checked
progress:  350 files checked
progress:  400 files checked
progress:  450 files checked
progress:  500 files checked
progress:  550 files checked
progress:  600 files checked
progress:  650 files checked
progress:  700 files checked
progress:  750 files checked
progress:  800 files checked
progress:  850 files checked
progress:  900 files checked
progress:  950 files checked
progress:  1000 files checked
time elapsed:  959.6594512462616


In [54]:
repeat_list.to_csv('repeat_track_list.csv', index = False) #save list for future reference

In [56]:
#subset to 2019 thaw season - defined here as March 8th to October 22nd
repeats_summer_19 = repeat_list_v3[(repeat_list_v3['date_1_decimal'] > 2019 + 2/12 + 8/365) & \
                             (repeat_list_v3['date_2_decimal'] < 2019 + 9/12 +22/365)]
repeats_summer_19.reset_index(inplace = True)

In [57]:
repeats_summer_19.to_csv('repeat_track_list_s_2019.csv', index = False)

# Estimate elevation change at valid repeat points

In [78]:
def repeat_diffv2(repeat_tracks, data_dir):
    "Searches through a given list of potential repeat tracks for points that < 5m apart, and estaimtes the height difference. \
    The resulting height differences and relevant metrics are recorded and returned in a pandas dataframe"
    
    #import needed packages
    import numpy as np
    import pandas as pd
    import time
    import geopy.distance
    from pyproj import Transformer
    
    #define some helper functions
    def read_h5(fname, vnames=[]):
        """ Simple HDF5 reader. """
        with h5py.File(fname, 'r') as f:
            return [f[v][:] for v in vnames] 

    def get_rgt(fname):
        return int(fname[31:35])

    def get_spot(fname):
        return(fname[47:51])
    
    start = time.time() #start timer
    os.chdir(data_dir)
    #intilize list for storing variables
    #elevation change
    dh = [] #elevation change (m)
    sigma_dh = [] #uncertainty in elevation change (m)
    #time of first and second pass
    time_1 = []
    time_2 = []
    dt = [] #time interval between repeats (years)
    #elevation on each date
    height_1 = []
    height_2 = []
    #uncertainties on each height
    s_h1 = []
    s_h2 = []
    #across-track position of each point
    y_1 = []
    y_2 = []
    #coordinates of each point
    lon_1 = []
    lat_1 = []
    lon_2 = []
    lat_2 = []
    #along and across-track slopes at each point
    slope_across_1 = []
    slope_across_2 = []
    slope_along_1 = []
    slope_along_2 = []
    #source files
    source_1 = []
    source_2 = []
    #along and across-track distances between each repeat point
    distance_x = []
    distance_y = []
    #ATL06 fitting window (m)
    window1 = []
    window2 = []
    
    #for conversion from lat/lon to UTM zone 6
    transformer = Transformer.from_crs("epsg:4326", "epsg:32606") 
    for i in range(0, len(repeat_tracks)): #loop through repeat track list
        #read in potential repeat files
        file_1 = repeat_tracks['file_1'][i]
        file_2 = repeat_tracks['file_2'][i]
        #for r=each track, read in: coordinates, height, height uncertainty, time, along and across-trac slope, azimuth,
        #and ATL06 fitting window
        lon1, lat1, h1, sh_1, t1, dh_dx1, dh_dy1,az1, window_1 = read_h5(file_1, ['lon', 'lat', 'h_elv', \
                                                                                     's_elv','t_year', 'dh_dx', 'dh_dy', \
                                                                                  'azimuth', 'window_width'])
        
        lon2, lat2, h2, sh_2, t2, dh_dx2, dh_dy2,az2, window_2 = read_h5(file_2, ['lon', 'lat', 'h_elv', \
        's_elv','t_year', 'dh_dx', 'dh_dy', 'azimuth', 'window_width'])
        
        #convert from lat/lon to UTM zone 6
        lon_1p, lat_1p = transformer.transform(lat1, lon1)
        lon_2p, lat_2p = transformer.transform(lat2, lon2)
        
        for j in range(0, len(lon1)): #loop through points in first track 
            lon_a = lon_1p[j]
            lat_a = lat_1p[j]
            #get ditance between point a and  all points on second track           
            distance = np.sqrt((lon_2p-lon_1p[j])**2 + (lat_2p-lat_1p[j])**2)
            
            #find closest point on second track
            index = distance == min(distance)
            lon_b = lon_2p[index]
            lat_b = lat_2p[index]
            
            if(np.sqrt((lat_a-lat_b)**2 + (lon_a-lon_b)**2) > 5): #keep as a valid repeat point if points are within < 5m of each other
                continue
            
            
            #get along and across-track distance between the points by projecting lat/lon (in UTM) to along/across track 
            #coordinates using the azimuth
            az = az1[j] #azimuth of one track (we'll assume the two tracks rough approximately equal azimuth)
            delta_Nc = lat_b - lat_a
            delta_Ec = lon_b - lon_a
            delta_x_c = delta_Nc*np.cos(np.radians(az)) + delta_Ec*np.sin(np.radians(az))
            delta_y_c = delta_Nc*np.sin(np.radians(az)) - delta_Ec*np.cos(np.radians(az))
            

            
            #pull out needed variables for both points
            #segment elevation
            h_1 = h1[j] 
            h_2 = h2[index][0]
            #segment height uncertainty
            sigma_h1 = sh_1[j] 
            sigma_h2 = sh_2[index][0]
            #time
            t_1 = t1[j]
            t_2 = t2[index][0]
            #along-track slope
            dh_dx_1 = dh_dx1[j]
            dh_dx_2 = dh_dx2[index][0]
            #across-track slope
            dh_dy_1 = dh_dy1[j]
            dh_dy_2 = dh_dy2[index][0]
            #fitting window
            w_1 = window_1[j]
            w_2 = window_2[index][0]
            
            
            #filter by along-track slope slope:
            if(abs((dh_dx_1)) > .05 or abs((dh_dx_2)) > .05):
                continue
            
            #calculate elevation change
            delta_h = h_2 - h_1
            #propogate uncertainty
            dh_sigma = np.sqrt(sigma_h1**2 + sigma_h2**2)

            #save all variables
            source_1.append(file_1)
            lat_1.append(lat1[j])
            lon_1.append(lon1[j])
            height_1.append(h_1)
            s_h1.append(sigma_h1)
            slope_along_1.append(dh_dx_1)
            slope_across_1.append(dh_dy_1)
            dh.append(delta_h)
            time_1.append(t_1)
            distance_x.append(delta_x_c[0])
            distance_y.append(delta_y_c[0])            
            source_2.append(file_2)           
            lon_2.append(lon2[index][0])
            lat_2.append(lat2[index][0])
            height_2.append(h_2)
            s_h2.append(sigma_h2)
            slope_along_2.append(dh_dx_2)
            slope_across_2.append(dh_dy_2)
            time_2.append(t_2)           
            sigma_dh.append(dh_sigma)
            dt.append(t_2 - t_1)
            window1.append(w_1)
            window2.append(w_2)
        
        if (i%10 == 0):  #progress check
            print('pairs_checked:', i)
    
    #compile varaibles into a dictionary, then a dataframe
    data_dict = {'source_1': source_1, 'source_2': source_2, 'lon_1': lon_1, 'lat_1': lat_1, 'lon_2': lon_2, 'lat_2': lat_2,\
                'h_1': height_1, 'sigma_h1': s_h1, 'h_2': height_2, 'sigma_h2': s_h2, 'dh': dh, \
                  'sigma_dh': sigma_dh, 'time_1': time_1, 'time_2': time_2, 'dt': dt,
                'dh_dx_1': slope_along_1, 'dh_dx_2': slope_along_2, 'dh_dy_1': slope_across_1, 'dh_dy_2': slope_across_2, \
                 'distance_x': distance_x, 'window_1': window1, 'window_2': window2, 'distance_y': distance_y}
    data = pd.DataFrame(data_dict)
     #remove infinty values (which can occur for across-track slope)
    data.replace([np.inf, -np.inf], np.nan, inplace = True) 
    data.dropna(inplace = True)
    data.reset_index(drop=True, inplace=True)
    #add rgt and spot information into data frame
    rgts = []
    spot_n = [] #integer code for spot number (note: not the same as the beam number)
    for i in range(0, len(data)):
        rgts.append(get_rgt(data['source_1'][i]))
        spot = get_spot(data['source_1'][i])
        if spot == 'gt1l':
            spot_n.append(1)
        elif spot == 'gt1r':
            spot_n.append(2)
        elif spot == 'gt2l':
            spot_n.append(3)
        elif spot == 'gt2r':
            spot_n.append(4)
        elif spot == 'gt3l':
            spot_n.append(5)
        elif spot == 'gt3r':
            spot_n.append(6)
        else:
            print('invalid beam')
            spot_n.append(float('nan'))

    data['rgt'] = rgts
    data['spot_n'] = spot_n
    end = time.time() #record total time elapsed
    print('elapsed time: ', end - start)
    os.chdir("..")
    return data

In [79]:
#estimate repeat elevation elevation change from potenetial 2019 summer tracks
repeat_dh = repeat_diffv2(repeats_summer_19, 'Anaktuvuk_v2_ATL06_reduced_all_slopes')

pairs_checked: 0
pairs_checked: 10
pairs_checked: 20
pairs_checked: 30
pairs_checked: 40
pairs_checked: 50
elapsed time:  57.371814489364624


In [80]:
#save output
repeat_dh.to_csv('repeat_dh_filtered_slope_05_final.csv')