# Calculate some cross-correlations in time series

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray as rxr
from scipy.signal import correlate

In [None]:
data_path = '/Users/raineyaberle/Research/Hubbard/'
figures_out_path = os.path.join(data_path, 'figures')

# Load velocity
v_fn = os.path.join(data_path, 'velocity', 'Hubbard_5eminus5.nc')
v = xr.open_dataset(v_fn)
v = v.rio.write_crs('EPSG:3413')
v = v.rio.reproject('EPSG:3338') # reproject to Alaska Albers
v['v'] = np.sqrt(v['vx']**2 + v['vy']**2) 
# Convert units from m/yr to m/d
v = v / 365

# Load terminus
term_fn = os.path.join(data_path, 'terminus', 'terminus_position_smooth.csv')
term = pd.read_csv(term_fn)
term['Date'] = pd.to_datetime(term['Date'])
term.set_index('Date', inplace=True)
# add mean column
term['mean'] = term.mean(axis=1)

# Air temperature
air_fn = os.path.join(data_path, 'weather', 'wx_Haenke_2014_2022_nogap.csv')
air = pd.read_csv(air_fn)
# Remove rows with no dates
Ikeep = [i for i in range(len(air)) if type(air['GMT'].values[i])==str]
air = air.iloc[Ikeep].reset_index(drop=True)
air.rename(columns={'GMT':'Date', 'Temp-AVG': 'AirTemp_Haenke'}, inplace=True)
air = air[['Date', 'AirTemp_Haenke']]
air['Date'] = pd.to_datetime(air['Date'])
# Resample to mean daily values
air.set_index('Date', inplace=True)
air = air.resample(pd.Timedelta('1D')).mean()
air.reset_index(inplace=True)

# Precipitation
pr_fn = os.path.join(data_path, 'weather', 'Yakutat_2000_2023.csv')
pr = pd.read_csv(pr_fn)
pr['Date_Time'] = pd.to_datetime(pr['Date_Time'])
pr.rename(columns={'Date_Time':'Date', 'precip_accum_24_hour_set_1':'Precip_Yakutat'}, inplace=True)
pr = pr[['Date', 'Precip_Yakutat']]
pr.set_index('Date', inplace=True)
pr = pr.resample('1D').mean()
pr.reset_index(inplace=True)


## Sample upglacier and downglacier velocities

In [None]:
# Pick a couple points
p1 = [808e3, 1212e3]
p2 = [800e3, 1202e3]

# Calculate the mean at each point over time
v1 = v.sel(x=p1[0], y=p1[1], method='nearest')
v1 = xr.where(v1 > 15, np.nan, v1)
v2 = v.sel(x=p2[0], y=p2[1], method='nearest')
v2 = xr.where(v2 > 15, np.nan, v2)

# Normalize from 0 to 1
v1_norm =  (v1 - np.min(v1)) / (np.max(v1) - np.min(v1))
v2_norm =  (v2 - np.min(v2)) / (np.max(v2) - np.min(v2))

# Plot
fig, ax = plt.subplots(1, 2, figsize=(10,4), gridspec_kw={'width_ratios': [1, 2]})
ax = ax.flatten()
      
v_im = ax[0].imshow(v.mean(dim='time').v.data, cmap='Greys_r', clim=(0,10),
                    extent=(np.min(v.x.data)/1e3, np.max(v.x.data)/1e3, 
                            np.min(v.y.data)/1e3, np.max(v.y.data)/1e3))
ax[0].plot(p1[0]/1e3, p1[1]/1e3, '*b', markersize=10, label='Upglacier point')
ax[0].plot(p2[0]/1e3, p2[1]/1e3, '*c', markersize=10, label='Downglacier point')
ax[0].legend(loc='lower left', bbox_to_anchor=[0.1, -0.6, 0.2, 0.2])
fig.colorbar(v_im, ax=ax[0], shrink=0.6, label='Velocity [m/d')
ax[0].set_xlabel('Easting [km]')
ax[0].set_ylabel('Northing [km]')
ax[1].plot(v1.time.data, v1_norm.v.data, '-b')
ax[1].plot(v2.time.data, v2_norm.v.data, '-c')

plt.show()

## Combine all variables into dataframe

In [None]:
# velocity
v_df = pd.DataFrame({'Date': v1.time.data,
                     'Velocity': v1.v.data})
# terminus
term_df = term['mean'].drop_duplicates().reset_index()
term_df.rename(columns={'mean':'Terminus'}, inplace=True)

# add cumsum PDDs and precip for each year
air['PDD'] = air['AirTemp_Haenke']
air.loc[air['PDD'] < 0] = 0
air['PDD_cumsum'] = air.groupby(pd.DatetimeIndex(air['Date']).year)['PDD'].cumsum()
air['Date'] = pd.to_datetime(air['Date'])
pr['Precip_Yakutat_cumsum'] = pr.groupby(pd.DatetimeIndex(pr['Date']).year)['Precip_Yakutat'].cumsum()

# combine
data_df = v_df.merge(term_df.merge(air.merge(pr, on='Date'), on='Date'), on='Date').set_index('Date')

# resample onto weekly time scale
data_resamp_df = data_df.resample(pd.Timedelta(1,'W')).mean()

# interpolate missing values
data_resamp_interp_df = data_resamp_df.interpolate(method='linear')

# plot
vars = list(data_df.columns)
fig, ax = plt.subplots(len(vars), 1, figsize=(8, 4*len(vars)))
for i, var in enumerate(vars):
    ax[i].plot(data_resamp_interp_df[var], '-c', label='resampled, interpolated data')
    ax[i].plot(data_df[var], '.b', label='raw data')
    ax[i].set_title(str(var))
    if i==0:
        ax[i].legend(loc='upper center', ncols=2, bbox_to_anchor=[0.35, 1.1, 0.2, 0.2])
plt.show()

## Calculate cross-correlation between all variables

In [None]:
# Define variable combinations
combos = [['AirTemp_Haenke', 'Terminus'],
          ['AirTemp_Haenke', 'Velocity'],
          ['Terminus', 'Velocity']]

# Define dictionary of colors for plotting variables
colors_dict = {'Velocity': '#1b9e77',
               'Terminus': '#d95f02',
               'Precip_Yakutat': '#7570b3',
               'AirTemp_Haenke': '#e7298a'}

# Set up figure
plt.rcParams.update({'font.size': 12, 'font.sans-serif': 'Arial'})
fig, ax = plt.subplots(len(combos), 2, figsize=(12, 4*len(combos)))

# Iterate over each combination of data variables
icombo = 0
for i in range(len(combos)):
    # grab data variables
    var1 = combos[i][0]
    var2 = combos[i][1]
    x1 = data_resamp_interp_df[var1]
    x2 = data_resamp_interp_df[var2]
        
    # de-mean
    # x1 = x1 - x1.mean()
    # x2 = x2 - x2.mean()

    # calculate cross-correlation
    xcorr = correlate(x1, x2, mode='same')
    Imax = np.ravel(np.argwhere(xcorr==np.nanmax(xcorr)))[0]

    # grab colors for each
    x1_col = colors_dict[var1]
    x2_col = colors_dict[var2]
        
    # plot
    ax[icombo,0].plot(x1.index, x1.values, '-', color=x1_col, label=var1)
    ax[icombo,0].spines['left'].set_color(x1_col)
    ax[icombo,0].tick_params(axis='y', colors=x1_col)
    ax[icombo,0].set_ylabel(var1, color=x1_col)
    ax[icombo,0].set_title(f'{var1} X {var2}')
    ax2 = ax[icombo,0].twinx()
    ax2.plot(x2.index, x2.values, '-', color=x2_col, label=var2)
    ax2.spines['right'].set_color(x2_col)
    ax2.tick_params(axis='y', colors=x2_col)
    ax2.set_ylabel(var2, color=x2_col)
    weeks = np.arange(-(len(xcorr)/2), len(xcorr)/2)
    ax[icombo,1].plot(weeks, xcorr, '-', color='grey')
    ax[icombo,1].plot(weeks[Imax], xcorr[Imax], '*', color='c', markersize=10)
    ax[icombo,1].set_xlabel('Weeks')
    ax[icombo,1].axvline(0, linestyle='-', color='k', linewidth=1)
    ax[icombo,1].axvline(weeks[Imax], linestyle='-', color='c', linewidth=2)
    ax[icombo,1].set_title(f'Max X-corr at {weeks[Imax]} weeks')
    ax[icombo,1].set_yticks([])
    ax[icombo,1].grid()
    
    icombo+=1
    
fig.subplots_adjust(wspace=0.2, hspace=0.4)
plt.show()

# save figure
fig_fn = os.path.join(figures_out_path, 'xcorr_velocity_terminus_weather.png')
fig.savefig(fig_fn, dpi=300, bbox_inches='tight')
print('Figure saved to file:', fig_fn)