In [1]:
import xarray as xr
import pandas as pd
import glob
from datetime import datetime
import re
import numpy as np
import geopandas
import rioxarray
from shapely.geometry import mapping

In [2]:
def retrieve_timestamp(tiff_list): # function to get timestamp from filename for inputting into an xarray dataframe
    print('\nRetrieving timestamp\n')
    time = []
    repeat_time_list = []
    for f, n in zip(tiff_list, range(len(tiff_list))):
        match = re.search(r"S_((\d+)_(\d+))_250m", f) # search through filename and retreive pair start/end time
        pair_start = pd.to_datetime(match.group(2), format='%Y%m%d')
        pair_end = pd.to_datetime(match.group(3), format='%Y%m%d')
        time_between = pair_start + (pair_end - pair_start)/2 # calculate time between
        
        repeat_time = pair_end - pair_start
        repeat_time = float(repeat_time.days)

        time.append(time_between)
        repeat_time_list.append(repeat_time)
    return time, repeat_time_list

def ROI_select(DATA, ROI_dir, invert=None):
        #print('Clipping DATA to ROI')
        shapefile_dir = ROI_dir
        glacier_shape = geopandas.read_file(shapefile_dir)
        DATA.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
        DATA.rio.write_crs("epsg:3413", inplace=True) 
        DATA = DATA.rio.clip(glacier_shape.geometry.apply(mapping), glacier_shape.crs, drop=True, all_touched=True)
        return DATA

def calculate_STD(data): # error is the standard deviation of all off-ice pixels. 
    print('\nCalculating STD for velocities\n')
    land_data = ROI_select(data, internal_data_dir + '\velocity\ROI_shapefiles\GIMP_land_mask.shp')
    land_data = land_data.where(land_data['S'] !=-9999) # drop -9999 values
    land_data_std = land_data['S'].std(dim=['y', 'x'], skipna=True).values # compute standard deviation of land pixels

    data['STD'] = (('time'), land_data_std) # adding standard deviation to dataframe
    data['STD'].attrs['long_name'] = 'standard_deviation'
    return data

def extract_ROI_velocity(S_data, U_data, V_data, ROI, U_STD, V_STD):
    print('Loading and extracting ROI....')
    ROI_ice_velocity = ROI_select(S_data, internal_data_dir + '\velocity\ROI_shapefiles\%s.shp' % ROI)
    U_ROI_ice_velocity = ROI_select(U_data, internal_data_dir + '\velocity\ROI_shapefiles\%s.shp' % ROI)
    V_ROI_ice_velocity = ROI_select(V_data, internal_data_dir + '\velocity\ROI_shapefiles\%s.shp' % ROI)

    print('Calculating medians...')
    ROI_ice_velocity_mean = ROI_ice_velocity['S'].median(dim=['y', 'x'], skipna=True) # calculate median velocity
    U_ROI_ice_velocity_mean = U_ROI_ice_velocity['U'].median(dim=['y', 'x'], skipna=True) # calculate median velocity
    V_ROI_ice_velocity_mean = V_ROI_ice_velocity['V'].median(dim=['y', 'x'], skipna=True) # calculate median velocity

    time_ROI = pd.to_datetime(U_ROI_ice_velocity_mean['time'].values)

    velocity = ROI_ice_velocity_mean.values

    print('Calculating errors...')
    error = (U_STD*(((U_ROI_ice_velocity_mean**2)**0.5)/ROI_ice_velocity_mean)) + (V_STD*(((V_ROI_ice_velocity_mean**2)**0.5)/ROI_ice_velocity_mean))
    error = error.values

    # delete timesteps with NaN or -9999 velocity 
    error = np.delete(error, np.argwhere(np.isnan(ROI_ice_velocity_mean.values)))
    error = np.delete(error, np.where(ROI_ice_velocity_mean.values < -10)) # less than 10 to remove -999 or -9999 values in mean, could change to .any()

    velocity = np.delete(ROI_ice_velocity_mean.values, np.argwhere(np.isnan(ROI_ice_velocity_mean.values)))
    velocity = np.delete(velocity, np.where(ROI_ice_velocity_mean.values < -10))

    time_ROI = np.delete(time_ROI, np.argwhere(np.isnan(ROI_ice_velocity_mean.values)))
    time_ROI = np.delete(time_ROI, np.where(ROI_ice_velocity_mean.values < -10))

    return velocity, error, time_ROI

In [None]:
################# LOAD IN METADATA FOR STANDARD DEVIATION #########################
col_names = ['start_year', 'start_month', 'start_day', 'end_year', 'end_month', 'end_day', 'min_off-ice_vel', 'max_off-ice_vel', 'mean_off-ice_vel', 'median_off-ice_vel', 'std_off-ice_vel']

u_std_file = pd.read_csv(external_data_dir + '\AS_S1_velocities\U\U_metadata_250m_20160103_20230529.txt', header=None, delimiter=',', usecols=[0,1,2,6,7,8,12,13,14,15,16], names=col_names) #skips columns with 0
u_std_file['start_date'] = pd.to_datetime(dict(year=u_std_file.start_year, month=u_std_file.start_month, day=u_std_file.start_day))
u_std_file['end_date'] = pd.to_datetime(dict(year=u_std_file.end_year, month=u_std_file.end_month, day=u_std_file.end_day))
u_std_file = u_std_file.drop(columns=['start_year', 'start_month', 'start_day', 'end_year', 'end_month', 'end_day'])
u_std_file['time_between'] = u_std_file['start_date'] + (u_std_file['end_date'] - u_std_file['start_date'])/2 # calculate time between
print(u_std_file)

v_std_file = pd.read_csv(external_data_dir + '\AS_S1_velocities\V\V_metadata_250m_20160103_20230529.txt', header=None, delimiter=',', usecols=[0,1,2,6,7,8,12,13,14,15,16], names=col_names) #skips columns with 0
v_std_file['start_date'] = pd.to_datetime(dict(year=v_std_file.start_year, month=v_std_file.start_month, day=v_std_file.start_day))
v_std_file['end_date'] = pd.to_datetime(dict(year=v_std_file.end_year, month=v_std_file.end_month, day=v_std_file.end_day))
v_std_file = v_std_file.drop(columns=['start_year', 'start_month', 'start_day', 'end_year', 'end_month', 'end_day'])
v_std_file['time_between'] = v_std_file['start_date'] + (v_std_file['end_date'] - v_std_file['start_date'])/2 # calculate time between


In [None]:
S_tiff_list = glob.glob(external_data_dir + '\AS_S1_velocities\S\*timefiltered.tif')
U_tiff_list = glob.glob(external_data_dir + '\AS_S1_velocities\U\*timefiltered.tif')
V_tiff_list = glob.glob(external_data_dir + '\AS_S1_velocities\V\*timefiltered.tif')


#Create variable used for time axis
extracted_time, repeat_time = retrieve_timestamp(S_tiff_list)
time_var = xr.Variable('time', extracted_time)

geotiffs_ds = xr.concat([xr.open_rasterio(i).chunk('auto') for i in S_tiff_list], dim=time_var)
geotiffs_ds = geotiffs_ds.to_dataset('band') # convert to dataset
ice_velocity = geotiffs_ds.rename({1: 'S'}) # rename band to speed

ice_velocity['repeat_time'] = (('time'), repeat_time)
ice_velocity['repeat_time'].attrs['long_name'] = 'Sentinel1_repeat_time'

########### U AND V  #################
geotiffs_ds_U = xr.concat([xr.open_rasterio(i).chunk('auto') for i in U_tiff_list], dim=time_var)
geotiffs_ds_U = geotiffs_ds_U.to_dataset('band') # convert to dataset
U_velocity = geotiffs_ds_U.rename({1: 'U'}) # rename band to speed

geotiffs_ds_V = xr.concat([xr.open_rasterio(i).chunk('auto') for i in V_tiff_list], dim=time_var)
geotiffs_ds_V = geotiffs_ds_V.to_dataset('band') # convert to dataset
V_velocity = geotiffs_ds_V.rename({1: 'V'}) # rename band to speed

In [None]:
internal_data_dir = # data directory in github repo
external_data_dir = # data directory on external hard drive

ROI_list = ['IS_ROI-1', 'RUSSELL_ROI-2', 'KAN_L_ROI', 'NORTH_NUNATAK_ROI-3', 'SOUTH_NUNATAK_ROI-4', 'UNNAMED_SOUTH_ROI-5']
for ROI in ROI_list:
    print('\n=========================================')
    print('Extracting velocity for:', ROI)
    print('=========================================\n')

    velocity, error, time_ROI = extract_ROI_velocity(ice_velocity, U_velocity, V_velocity, ROI, u_std_file['std_off-ice_vel'], v_std_file['std_off-ice_vel'])

    ROI_array = np.array([velocity, error])
    ROI_array = np.transpose(ROI_array)
    ROI_df = pd.DataFrame(ROI_array, columns=['S', 'STD'])
    ROI_df.index = time_ROI
    ROI_df.to_csv(internal_data_dir + '\velocity\%s_timeseries.csv' % ROI)
    print('\nCreated CSV for', ROI)