# Grab glacier centerline speeds and the plot the speed evolution time series

_Last modified by jukesliu@u.boisestate.edu on 2022-05-02._

In [1]:
import pandas as pd
import os
import numpy as np
from ordered_set import OrderedSet # pip install ordered-set
import cmocean
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.colors import LogNorm
import datetime

from additional_functions import mytomd, unique_date_df

In [2]:
basepath = '/Users/jukesliu/Documents/TURNER/DATA/VELOCITY_MAPS/' # SET WORKING DIRECTORY
# the root folder that holds centerline data

In [7]:
# grab reference centerline distances from a data file:
sorted_df = pd.read_csv(basepath+'ASF_autoRIFT/centerline_data_n.csv', # PATH TO THE SAR CSV
                    usecols=[2,3,4,5,6,7,8,9,10,11]) # may need to adjust column
dists = list(OrderedSet(sorted_df.dist_km))[1:]
dists.sort()
print(dists)

[0.6621358210688768, 1.1809038467853512, 1.96913473157004, 2.6019051815086787, 3.4028123782370128, 3.968339390980005, 4.487107416696439, 5.059990864284815, 5.800090100751969, 6.280106596531697, 6.764581908855604, 7.502383884003746, 8.585703615784784, 9.277921319256537, 9.961512766630737, 10.532482627862397, 11.133169686797553, 12.659516601584729, 13.58041624731048, 14.51964045447002, 15.117263301936958, 15.965774636580152, 17.173794530096174, 17.655295102578453, 18.200551279194777, 18.8973339580466, 19.35440784028735, 20.06910451852252, 20.97977811303844, 21.561178724745403, 22.211724307818333, 23.04869915438657, 23.8292214713847, 24.90490075727111, 25.919781117759506, 26.71240617924131, 27.34837656823172, 28.03341304380494, 28.751095878644183, 29.54666033803075]


# 1A) Read in individual centerline profiles and combine

When speeds along centerline are stored in a folder containing a CSV file with the data for each date.

In [4]:
cfilespath = basepath+'/centerline_data_figure/' # ENTER PATH TO THE FOLDER CONTAINING THE PROFILES

interp_dfs = []
for file in os.listdir(cfilespath):
    if file.startswith('profile') and file.endswith('.csv') and 'S' not in file:
        ds1 = file[8:16]; ds2 = file[17:25]
        profile_df = pd.read_csv(cfilespath+file, names=['dist_km','vmag_md']) # read in data
        
        # correct distance values if not correct
        maxdist = np.nanmax(profile_df.dist_km) # grab the maximum distance value 
        if  maxdist < 29.54666: # distance should go out to 29.54 km
            newdistkm = np.flip(np.array(profile_df.dist_km * 29.54666/maxdist)) # rescale to 29.54 km
            profile_df.dist_km = newdistkm # replace in dataframe
        
#         downsample to SAR dist values
        sample_indices = np.round(np.linspace(0,len(profile_df)-1,len(dists))) # grab sampling indexes for dists
        distances = dists
#         sample_indices = np.round(np.linspace(0,len(profile_df)-1,63)) # grab full profile
#         distances = np.flip(profile_df.dist_km[sample_indices])
        
        v_interp = np.flip(profile_df.vmag_md[sample_indices]) # grab down-sampled spped values
    
        # fill in ds1 and ds2 columns
        ds1s = np.full(np.size(v_interp),ds1) 
        ds2s = np.full(np.size(v_interp),ds2)

        # enter into dataframe
        interp_df = pd.DataFrame(list(zip(ds1s,ds2s,distances,v_interp)),columns=['ds1','ds2','dist_km','vmag'])
        
        # calculate datetimes
        interp_df['ds1'] = pd.to_datetime(ds1s, format='%Y%m%d')
        interp_df['ds2'] = pd.to_datetime(ds2s, format='%Y%m%d')
        interp_dfs.append(interp_df)

# enter into one dataframe
interp_total = pd.concat(interp_dfs).sort_values(by=['ds1','ds2','dist_km'])
interp_total = interp_total.drop_duplicates()
interp_total.head()

Unnamed: 0,ds1,ds2,dist_km,vmag
0,2013-06-10,2013-06-17,0.662136,0.0
1,2013-06-10,2013-06-17,1.180904,21.66988
2,2013-06-10,2013-06-17,1.969135,19.900368
3,2013-06-10,2013-06-17,2.601905,22.848957
4,2013-06-10,2013-06-17,3.402812,19.935375


In [5]:
# grab the unique dates
df2 = unique_date_df(interp_total,'ds1','ds2')
df2

Unnamed: 0,ds1,ds2
0,2013-06-10,2013-06-17
1,2013-06-17,2013-07-12
2,2013-07-12,2013-07-28
3,2013-07-28,2013-08-13
4,2013-08-13,2013-10-07
...,...,...
153,2021-08-15,2021-08-30
154,2021-08-30,2021-09-07
155,2021-09-07,2021-09-19
156,2021-09-19,2021-09-22


In [None]:
# initialize speed grid
speed_grid = np.zeros((len(df2), len(distances)))

# fill in speed grid with distances
rown = 0
for idx, row in df2.iterrows():
    # grab the dates
    d1 = row.ds1; d2 = row.ds2
    
    # grab the part of the df matching those dates
    date_df = interp_total[interp_total.ds1 == d1]
    date_df = date_df[date_df.ds2 == d2]
    date_df.reset_index(drop=True, inplace=True)
    date_df = date_df.drop_duplicates(subset=['dist_km'],keep='last') # drop duplications
    
    # append into row of speed grid
    speed_grid[rown,:] = list(date_df.vmag) # add speed along centerline to speed_grid
    rown += 1
print(speed_grid.shape)

### Fill in temporal gaps:

In [None]:
date_ends = np.array(list((zip(df2.ds1, df2.ds2)))).flatten() # intersperse ds1 and ds2

fill_dfs = []
counter = 0
# identify temporal gaps
for i in np.arange(0, len(date_ends),2):
    if i+2 < len(date_ends):
        # grab the two dates for that speed_grid()
        date1_start = date_ends[i]
        date1_end = date_ends[i+1]
        date2_start = date_ends[i+2]
        print(str(date1_start)[0:10],str(date1_end)[0:10]) # print the date start and end
    
        if not date1_end == date2_start: # if the end date and the next start date don't match
            print()
            print('Gap between', date1_end, 'and', date2_start) # Gap found
            print()

            # OPTION 1: fill in all gaps gap with Nans
            ds1s = np.full(np.size(distances),date1_end) 
            ds2s = np.full(np.size(distances),date2_start)
            nans = np.empty(len(distances)); nans[:] = np.nan # create list of nans to fill
            fill_df = pd.DataFrame(list(zip(ds1s, ds2s,distances,nans)),columns=interp_total.columns)
            fill_dfs.append(fill_df)
            
#             # OPTION 2: fill in the gaps with SAR data
#             sorted_df = pd.read_csv(basepath+'ASF_autoRIFT/centerline_data_n.csv', # PATH TO THE SAR CSV
#                                     usecols=[2,3,4,5,6,7,8,9,10,11]) # may need to adjust column
#             # convert dates to datetime objects using pd
#             sorted_df['mid_date'] = pd.to_datetime(sorted_df.mid_date, format='%Y%m%d')
#             sorted_df['ds1'] = pd.to_datetime(sorted_df.ds1, format='%Y-%m-%d')
#             sorted_df['ds2'] = pd.to_datetime(sorted_df.ds2, format='%Y-%m-%d')
#             sorted_df['vmag'] = mytomd(sorted_df.vmag) # convert velocities to m/day
#             sorted_df['v_error'] = mytomd(sorted_df.v_error)
#             sorted_df = sorted_df.drop_duplicates(subset=['ds1','lat'],keep='first') # drop overlapping dates

#             # grab each unique date pair
#             dates_df = unique_date_df(sorted_df,'ds1','ds2')
#             gap_df = dates_df[(dates_df.ds1 >= date1_end) & (dates_df.ds2 <= date2_start)] # find those in gap
#             if len(gap_df) > 0: # fill in gaps with SAR data
#                 print('Fill with:')
#                 for idx, row in gap_df.iterrows():
#                     print(row.ds1, row.ds2)
#                     df = sorted_df[(sorted_df.ds1 == row.ds1) & (sorted_df.ds2 == row.ds2)]
#                     fill_df = df[["ds1", "ds2","dist_km", "vmag"]]
#                     fill_dfs.append(fill_df)
#             else: # if no optical data found, fill in with nans
#                 print("Fill with nans")
#                 ds1s = np.full(np.size(dists),date1_end) 
#                 ds2s = np.full(np.size(dists),date2_start)
#                 nans = np.empty(len(dists)); nans[:] = np.nan # create list of nans to fill
#                 fill_df = pd.DataFrame(list(zip(ds1s, ds2s, dists,nans)),columns=interp_total.columns)
#                 fill_dfs.append(fill_df)
                
                
            counter+=1  # count the gaps      
print(counter, 'gaps')

# add gap-filling data back into the dataframe
interp_total_filled = pd.concat([interp_total,pd.concat(fill_dfs)]).sort_values(by=['ds1','ds2','dist_km'])

# 1B) Read in combined centerline data file

When speeds along centerline for all dates have been combined into one large file.

In [None]:
# csvpath = basepath+'ASF_autoRIFT/centerline_data_n.csv' # ENTER PATH TO THE CSV FILE WITH ALL DATA COMBINED
csvpath = '/Users/jukesliu/Documents/TURNER/DATA/VELOCITY_MAPS/its_live/notebooks/'
csvpath += 'ITS_LIVE_vx_vy_optical_centerline_n.csv'

# read into a dataframe
sorted_df = pd.read_csv(csvpath, usecols=[1,2,3,4,6,7,8,9,12,13]) # ADJUST COLUMNS AS NEEDED, DROP MID_DATE
sorted_df.head()

In [None]:
# convert dates to datetime objects using pd
sorted_df['ds1'] = pd.to_datetime(sorted_df.ds1, format='%Y-%m-%d')
sorted_df['ds2'] = pd.to_datetime(sorted_df.ds2, format='%Y-%m-%d')

# convert velocities to m/day
sorted_df['vmag'] = mytomd(sorted_df.vmag)
sorted_df['v_error'] = mytomd(sorted_df.v_error)

# drop overlapping dates!
sorted_df = sorted_df.drop_duplicates(subset=['ds1','lat'],keep='first') 
sorted_df.head()

In [None]:
# grab each unique date pair
df2 = unique_date_df(sorted_df,'ds1','ds2')
df2.head()

In [None]:
# initialize speed grid
speed_grid = np.zeros((len(df2), len(dists)))
print(speed_grid.shape)

# fill in speed grid with speed values for each date pair
rown = 0
for idx, row in df2.iterrows():
    # grab the dates
    d1 = row.ds1; d2 = row.ds2
    
    # grab the part of the df matching those dates
    date_df = sorted_df[sorted_df.ds1 == d1]
    date_df = date_df[date_df.ds2 == d2]
    date_df.reset_index(drop=True, inplace=True)
    date_df = date_df.drop_duplicates(subset='dist_km',keep='first')

    # append into row of speed grid
    speed_grid[rown,:] = list(date_df.vmag) # add speed along centerline to speed_grid   
    rown += 1

### Fill in temporal gaps:

In [None]:
# intersperse ds1 and ds2
date_ends = np.array(list((zip(df2.ds1, df2.ds2)))).flatten()

fill_dfs = []
counter = 0
# identify number of gaps in to fill in data
for i in np.arange(0, len(date_ends),2):
    if i+2 < len(date_ends):
        # grab the two dates for that speed_grid()
        date1_start = date_ends[i]
        date1_end = date_ends[i+1]
        date2_start = date_ends[i+2]
        print(str(date1_start)[0:10],str(date1_end)[0:10]) # print the date start and end
        
        if not date1_end == date2_start: # if the end date and the next start date don't match
            print()
            print('Gap between', str(date1_end)[0:10], 'and', str(date2_start)[0:10])
            print()
            
#             # OPTION 1: fill in gaps with Nans
#             ds1s = np.full(np.size(dists),date1_end) 
#             ds2s = np.full(np.size(dists),date2_start)
#             nans = np.empty(len(dists)); nans[:] = np.nan # create list of nans to fill
#             fill_df = pd.DataFrame(list(zip(nans, nans, nans, nans, nans, nans, dists, nans, ds1s, ds2s)),
#                                    columns=sorted_df.columns)
#             fill_dfs.append(fill_df)
            
            # OPTION 2: fill in gaps with optical data
            for file in os.listdir(cfilespath): # path to optical data
                if file.startswith('profile') and file.endswith('.csv') and 'S' not in file:
                    dstr1 = file[8:16]; dstr2 = file[17:25]
                    ds1 = pd.to_datetime(dstr1,format='%Y%m%d')
                    ds2 = pd.to_datetime(dstr2,format='%Y%m%d')
                    
                    # find optical data between the gap dates
                    if (ds1 >= date1_end) and (ds2 <= date2_start): 
                        print('Fill with optical data :',dstr1, dstr2)
                        profile_df = pd.read_csv(cfilespath+file, names=['dist_km','vmag_md'])
                        
                        # down-sample the optical data
                        maxdist = np.nanmax(profile_df.dist_km)
                        if  maxdist < 29.54666: # distance should go out to 29.54 km
                            newdistkm = np.flip(np.array(profile_df.dist_km * 29.54666/maxdist)) # rescale to 29.54 km
                            profile_df.dist_km = newdistkm # replace in dataframe
                        sample_indices = np.round(np.linspace(0,len(profile_df)-1,40))
                        v_interp = np.flip(profile_df.vmag_md[sample_indices])

                        # fill in a ds1 and ds2 columns
                        ds1s = np.full(np.size(v_interp),ds1) 
                        ds2s = np.full(np.size(v_interp),ds2)
                        nans = np.empty(np.size(v_interp)); nans[:] = np.nan # create list of nans to fill

                        # enter into dataframe
                        fill_df = pd.DataFrame(list(zip(nans, nans, nans, nans, v_interp, nans, dists, nans, ds1s, ds2s)),
                                           columns=sorted_df.columns)
                        fill_dfs.append(fill_df)
            counter+=1
print(counter, 'gaps')

# return the filled df
interp_total_filled = pd.concat([sorted_df,pd.concat(fill_dfs)]).sort_values(by=['ds1','ds2','dist_km'])
interp_total_filled.head()

In [None]:
# FILL ALL REMAINING GAPS IF OPTICAL DATA ARE INPUT
df2 = pd.DataFrame(list(OrderedSet(zip(interp_total_filled.ds1, interp_total_filled.ds2))),
                   columns=['ds1','ds2'])
date_ends = np.array(list((zip(df2.ds1, df2.ds2)))).flatten()

fill_dfs = []
counter = 0
# identify number of gaps in to fill in data
for i in np.arange(0, len(date_ends),2):
    if i+2 < len(date_ends):
        # grab the two dates for that speed_grid()
        date1_start = date_ends[i]
        date1_end = date_ends[i+1]
        date2_start = date_ends[i+2]
        print(date1_start, date1_end)
        
        if not date1_end == date2_start: # if the end date and the next start date don't match
            print()
            print('Gap between', date1_end, 'and', date2_start)
            print()
            
            # fill in a the gaps with nans
            ds1s = np.full(np.size(dists),date1_end) 
            ds2s = np.full(np.size(dists),date2_start)
            nans = np.empty(len(dists)); nans[:] = np.nan # create list of nans to fill
            fill_df = pd.DataFrame(list(zip(nans, nans, nans, nans, nans, nans, dists, nans, ds1s, ds2s)),
                                   columns=sorted_df.columns)
            fill_dfs.append(fill_df)
            counter+=1  
print(counter, 'gaps')

interp_total_filled = pd.concat([interp_total_filled,pd.concat(fill_dfs)]).sort_values(by=['ds1','ds2','dist_km'])
interp_total_filled.head()

## 2) Plot speed evolution time series

In [None]:
# grab ordered dates as a dataeframe
df2_filled = unique_date_df(interp_total_filled,'ds1','ds2')
df2_filled

In [None]:
# Re-create speed grid with gap-filled data
speed_grid_filled = np.zeros((len(df2_filled), len(distances)))

# speed grid with gap-filled data
rown = 0
for idx, row in df2_filled.iterrows():
    d1 = row.ds1; d2 = row.ds2 # grab the dates
    
    # grab the part of the df matching those dates
    date_df = interp_total_filled[interp_total_filled.ds1 == d1]
    date_df = date_df[date_df.ds2 == d2]
    date_df.reset_index(drop=True, inplace=True)
    date_df = date_df.drop_duplicates(subset=['dist_km'],keep='last') # drop duplications
    
    # append into row of speed grid
    speed_grid_filled[rown,:] = list(date_df.vmag) # add speed along centerline to speed_grid
    rown += 1
print(speed_grid_filled.shape)

In [None]:
# determine the grid spacing based on the time differences
spacing = np.round(np.diff(df2_filled.ds1)/np.min(np.diff(df2_filled.ds1)))
spacing = np.append(spacing, spacing[-1]) # add last spacing
spacing

In [None]:
# create spaced grid
speed_spaced = np.repeat(speed_grid_filled,spacing.astype(int),0) # along rows
# speed_spaced[speed_spaced == 0] = np.NaN
speed_spaced

# Plot:

In [None]:
# User input to properly generate axis labels for the plot:
# if grid tick spacing is unknown, run the cell below with the tick labels turned off to see tick spacing

# create centerline (x-axis) labels 
x = np.linspace(dists[0], dists[-1], speed_grid.shape[1]+1)
x_labels = np.insert(x[::10],0,0).astype(int) # INPUT GRID TICK SPACING FOR X (e.g. 10 or 5)
print(x_labels)

# create y-axis labels
t = pd.date_range(start='2013-06-10',end='2021-09-22',periods=len(speed_spaced)) # INPUT START AND END DATE
y = np.insert(t[::500],0,t[0]) # INPUT GRID TICK SPACING (e.g. 250, 200, 500, or 20)
y_labels = [ystr[:7] for ystr in y.astype(str)] # grab the first 7 digits YYYY-MM for each timestamp in y
print(y_labels)

In [None]:
# Plot the properly-spaced speed evolution map with imshow
fig,ax = plt.subplots(1,1,figsize=(6,6))
ax.set_facecolor('black')
grid = plt.imshow(speed_spaced,aspect=0.03, # 0.03 for full 2013-2022
                  cmap=cmocean.cm.thermal,
#                  vmin=0, vmax=7) # linear coloring
                  norm=LogNorm(vmin=1, vmax=25)) # log norm coloring
fig.colorbar(grid, orientation="vertical",label="Surface speed (m/day)")

# set tick labels
ax.set_xticklabels(x_labels); plt.xlabel('Distance from terminus (km)')
ax.set_yticklabels(y_labels)

# plt.savefig(basepath+"figures/Optical_filled_with_SAR.png",dpi=300) # save figure
# plt.title('SAR and Optical') # add title
plt.show()