<center><strong><font size=+3>Applications of robust 2D median estimators to HERA data</font></center>
<br><br>
</center>
<center><strong><font size=+2>Matyas Molnar and Bojan Nikolic</font><br></strong></center>
<br><center><strong><font size=+1>Astrophysics Group, Cavendish Laboratory, University of Cambridge</font></strong></center>

In [None]:
import os
import shutil
import sys
import tempfile
import textwrap
from IPython.display import Video

import numpy as np
import seaborn as sns
from matplotlib import gridspec
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from mpl_toolkits.axes_grid1.inset_locator import mark_inset, zoomed_inset_axes
from scipy import signal
from scipy.stats import chi2, shapiro
from scipy.stats.mstats import gmean as geometric_mean
from statsmodels.nonparametric.kernel_regression import KernelReg

from hera_cal.io import HERAData
from hera_cal.redcal import get_reds
from hera_cal.utils import JD2LST

from robstat.ml import nan_interp1d, nan_interp2d
from robstat.plotting import grid_heatmaps, row_heatmaps
from robstat.robstat import Cmardia_median, geometric_median, mardia_median, mv_median, \
mv_normality, mv_outlier, tukey_median
from robstat.stdstat import mad_clip, rsc_mean
from robstat.utils import DATAPATH

In [None]:
plt.rcParams['figure.figsize'] = (12, 8)
%matplotlib inline

In [None]:
plot_figs = False
if plot_figs:
    import matplotlib as mpl
    mpl.rcParams['figure.dpi'] = 300

### Load HERA visibility data

In [None]:
sample_data = os.path.join(DATAPATH, 'zen.2458098.43869.HH.OCRSA.uvh5')

hd = HERAData(sample_data)
data, flags, _ = hd.read()

reds = get_reds(hd.antpos, pols=hd.pols)
flat_bls = [bl for grp in reds for bl in grp if bl in data.keys()]
reds = [grp for grp in reds if set(grp).issubset(flat_bls)]
bl_dict = {k: i for i, k in enumerate(flat_bls)}

data = {k: np.ma.array(v, mask=flags[k], fill_value=np.nan) for k, v \
        in data.items()}
mdata = np.ma.empty((hd.Nfreqs, hd.Ntimes, hd.Nbls), fill_value=np.nan, \
                     dtype=complex)
for i, bl in enumerate(flat_bls):
    mdata[..., i] = data[bl].transpose()
    
data = mdata.filled() # dimensions (freqs, times, bls)
flags = mdata.mask

### Redundant averaging

In [None]:
slct_bls = reds[0]
slct_bl_idxs = np.array([bl_dict[slct_bl] for slct_bl in slct_bls])
slct_data = data[..., slct_bl_idxs]
slct_flags = flags[..., slct_bl_idxs]
assert slct_flags.sum() == np.isnan(slct_data).sum()
print('Looking at baselines redundant to {}'.format(slct_bls[0]))

In [None]:
# Look at one time integration / frequency slice with high variance
idxs = np.unravel_index(np.nanargmax(np.nanstd(slct_data, axis=-1)), \
                        slct_data.shape[:2])
print('Selecting freq / time slice {}'.format(idxs))
slct_data_slice = slct_data[idxs[0], idxs[1], :]

flt_nan = lambda x: x[~np.isnan(x)]
sample_gmean = geometric_mean(flt_nan(slct_data_slice))
sample_gmed = geometric_median(slct_data_slice, keep_res=True)
sample_tmed = tukey_median(slct_data_slice)['barycenter']
sample_mmed = Cmardia_median(slct_data_slice)
marg_med = lambda x : np.nanmedian(x.real) + np.nanmedian(x.imag)*1j
sample_bmed = marg_med(slct_data_slice)
sample_hmean = rsc_mean(slct_data_slice)

In [None]:
med_ests = list(zip([sample_gmean, sample_gmed, sample_tmed, sample_mmed, sample_bmed, sample_hmean], 
               ['Geometric Mean', 'Geometric Median', 'Tukey Median', 'Mardia Median', \
                'Marginal Median', 'HERA Mean']))
for me in med_ests:
    print('{:17s}: {:4f}'.format(me[1], me[0]))

In [None]:
fig, ax = plt.subplots(figsize=(7, 7))

ax.scatter(slct_data_slice.real, slct_data_slice.imag, alpha=0.5)
ax.plot(sample_gmean.real, sample_gmean.imag, 'co', label='Geometric Mean')
ax.plot(sample_gmed.real, sample_gmed.imag, 'ro', label='Geometric Median')
ax.plot(sample_tmed.real, sample_tmed.imag, 'yo', label='Tukey Median')
ax.plot(sample_mmed.real, sample_mmed.imag, 'ko', label='Mardia Median')
ax.plot(sample_bmed.real, sample_bmed.imag, 'bo', label='Marginal Median')
ax.plot(sample_hmean.real, sample_hmean.imag, 'go', label='HERA Mean')

ax.annotate(slct_bls[0], xy=(0.05, 0.05), xycoords='axes fraction')
ax.set_xlabel(r'$\mathfrak{Re} \; (V)$')
ax.set_ylabel(r'$\mathfrak{Im}(V)$')

plt.legend(loc='lower right')
plt.tight_layout()
plt.show()

In [None]:
time_int = np.where(~np.isnan(data).all(axis=(0, 2)))[0][0] # first non-nan index
# perhaps find index with fewest nans?
gmean_res = np.empty((data.shape[0], len(reds)), dtype=complex)
gmed_res, tmed_res, mmed_res, bmed_res, hmean_res = \
    [np.empty_like(gmean_res) for _ in range(5)]

gmed_bf, mmed_bf = None, None
for bl, bl_grp in enumerate(reds):
    slct_bl_idxs = np.array([bl_dict[slct_bl] for slct_bl in bl_grp])
    for f, frow in enumerate(data[:, time_int, slct_bl_idxs]):
        if np.isnan(frow).all():
            gmean_ij = gmed_ij = tmed_ij = mmed_ij = bmed_ij = hmean_ij = np.nan
        else:
            gmean_bf = geometric_mean(flt_nan(frow))
            gmed_bf = geometric_median(frow, init_guess=gmed_bf, keep_res=True)
            tmed_bf = tukey_median(frow)['barycenter']
            mmed_bf = Cmardia_median(frow, init_guess=None)
            bmed_bf = marg_med(frow)
            hmean_bf = rsc_mean(frow)
        gmean_res[f, bl] = gmean_bf
        gmed_res[f, bl] = gmed_bf
        tmed_res[f, bl] = tmed_bf
        mmed_res[f, bl] = mmed_bf
        bmed_res[f, bl] = bmed_bf
        hmean_res[f, bl] = hmean_bf
        
med_est_res = list(zip([i[1] for i in med_ests], \
                  [gmean_res, gmed_res, tmed_res, mmed_res, bmed_res, hmean_res]))

In [None]:
fig = plt.figure(constrained_layout=True, figsize=(10, 20), dpi=100)
spec = gridspec.GridSpec(nrows=2*len(med_ests), figure=fig, ncols=2)

axes = []
for i in range(len(med_ests)):
    ax1 = fig.add_subplot(spec[i*2:2+i*2, 0])
    ax2 = fig.add_subplot(spec[i*2, 1])
    ax3 = fig.add_subplot(spec[i*2+1, 1])
    axes.append([ax1, ax2, ax3])

color = [None for i in med_est_res]
for m, med_est in enumerate(med_est_res):
    for i, bl_grp in enumerate(range(len(reds))):
        axes[m][0].plot(hd.freqs, med_est[1][:, i].real, color=color[m], \
                   label='{}'.format(reds[i][0]) + r'$\mathfrak{Re}$')
        c = axes[m][0].get_lines()[-1].get_color()
        color[m] = next(axes[m][0]._get_lines.prop_cycler)['color']
        axes[m][0].plot(hd.freqs, med_est[1][:, i].imag, color=c, \
                   label='{}'.format(reds[i][0]) + r'$\mathfrak{Im}$', ls='--')
        axes[m][1].plot(hd.freqs, np.abs(med_est[1][:, i]), color=c, \
                   label='{}'.format(reds[i][0]))
        axes[m][2].plot(hd.freqs, np.angle(med_est[1][:, i]), color=c, \
                   label='{}'.format(reds[i][0]), ls='--')
        axes[m][0].text(x=0.05, y=0.5, s=med_est[0], transform=axes[m][0].transAxes, \
                        fontsize=10, style='normal', weight='light')

for ax in axes:
    ax[0].set_ylabel(r'$V$')
    ax[1].set_ylabel(r'$|V|$')
    ax[2].set_ylabel(r'$\varphi$')
    
for ax in axes[-1]:
    ax.set_xlabel(r'$\nu$')
    
axes[0][0].set_title('Cartesian')
axes[0][1].set_title('Polar')

for ax in axes[0]:
    ax.legend(framealpha=0.5, loc=1)

plt.suptitle('Median estimates for 14-m EW baselines')
plt.show()

### LST averaging

In [None]:
sample_xd_data = np.load(os.path.join(DATAPATH, 'xd_vis_rph.npz'))

In [None]:
xd_data = sample_xd_data['data'] # dimensions (days, freqs, times, bls)
xd_flags = sample_xd_data['flags']
xd_data[xd_flags] = np.nan

xd_redg = sample_xd_data['redg']
xd_times = sample_xd_data['times']
xd_pol = sample_xd_data['pol'].item()
xd_lsts = JD2LST(xd_times)*12/np.pi # in hours

freqs = sample_xd_data['freqs']
chans = sample_xd_data['chans']
if chans[-1]%100 == 99:
    plt_chans = np.append(chans, chans[-1]+1)
else:
    plt_chans = chans

In [None]:
bl_grp = 0 # only look at 0th baseline group

slct_bl_idxs = np.where(xd_redg[:, 0] == bl_grp)[0]
data = xd_data[..., slct_bl_idxs]
flags = xd_flags[..., slct_bl_idxs]
slct_red_bl = xd_redg[slct_bl_idxs[0], :][1:]
print('Looking at baselines redundant to ({}, {}, \'{}\')'.\
      format(*slct_red_bl, xd_pol))

In [None]:
no_bls = 4 # just pick the first four baselines from the selected baseline group

# use 2 time integrations for each median, as done in HERA LST-binning
new_no_tints = int(np.ceil(data.shape[2]/2))
xd_gmed_res_t = np.empty((xd_data.shape[1], new_no_tints, no_bls), dtype=complex)
xd_tmed_res_t, xd_bmed_res_t, xd_hmean_res_t = \
    [np.empty_like(xd_gmed_res_t) for _ in range(3)]

gmed_ij = None
for bl in range(no_bls):
    xd_data_b = data[..., bl]
    for freq in range(xd_data_b.shape[1]):
        for tint in range(new_no_tints):
            xd_data_bft = xd_data_b[:, freq, 2*tint:2*tint+2].flatten()
            if np.isnan(xd_data_bft).all():
                gmed_ft = tmed_ft = bmed_ft = hmean_ft = np.nan
            else:
                gmed_ft = geometric_median(xd_data_bft, init_guess=gmed_ij, \
                                           keep_res=True)
                tmed_ft = tukey_median(xd_data_bft)['barycenter']
                bmed_ft = marg_med(xd_data_bft)
                hmean_ft = rsc_mean(xd_data_bft)
            xd_gmed_res_t[freq, tint, bl] = gmed_ft
            xd_tmed_res_t[freq, tint, bl] = tmed_ft
            xd_bmed_res_t[freq, tint, bl] = bmed_ft
            xd_hmean_res_t[freq, tint, bl] = hmean_ft

In [None]:
arrs = [xd_gmed_res_t, xd_tmed_res_t, xd_bmed_res_t, xd_hmean_res_t]
flt_arrs = []
for arr in arrs:
    nan_bl = np.isnan(arr).all(axis=(0, 1))
    if nan_bl.any():
        arr = np.delete(arr, np.where(nan_bl)[0], axis=-1)
    flt_arrs.append(arr)

In [None]:
grid_arrs = [[arr[..., i] for i in range(flt_arrs[0].shape[-1])] for arr in flt_arrs]
titles = ['Geometric Median', 'Tukey Median', 'Marginal Median', 'HERA Mean']

ylabels = [str(ylab) + '\n\nFrequency channel' for ylab in reds[bl_grp][:no_bls]]

grid_heatmaps(grid_arrs, apply_np_fn='abs', titles=titles, ybase=25, \
              xlabels='Time bin', ylabels=ylabels, clip_pctile=1, yticklabels=plt_chans, \
              figsize=(12, 10))

In [None]:
grid_heatmaps(grid_arrs, apply_np_fn='angle', titles=titles, ybase=25, \
              xlabels='Time bin', ylabels=ylabels, yticklabels=plt_chans, figsize=(12, 10))

In [None]:
grid_heatmaps(grid_arrs, apply_np_fn='real', titles=titles, ybase=25, \
              xlabels='Time bin', ylabels=ylabels, clip_pctile=1, yticklabels=plt_chans, \
              figsize=(12, 10))

In [None]:
grid_heatmaps(grid_arrs, apply_np_fn='imag', titles=titles, ybase=25, \
              xlabels='Time bin', ylabels=ylabels, clip_pctile=1, yticklabels=plt_chans, \
              figsize=(12, 10))

### LST + redundant averaging

In [None]:
# Look at 2 consecutive time integrations / 1 frequency slice with high variance
idxs = np.unravel_index(np.nanargmax(np.nanstd(data[..., :-1, :], axis=(0, -1))), \
                        data.shape[1:-1])
print('Selecting freq / %time slice: ({}, {}-{})'.format(idxs[0], idxs[1], idxs[1]+1))

# Have visibilities across days for the same baseline (2 time bins)
# flatten the data array and perform statistics on the whole dataset
data_slice = data[:, idxs[0], idxs[1]:idxs[1]+2, :].flatten()

xd_sample_gmean = geometric_mean(flt_nan(data_slice))
xd_sample_gmed = geometric_median(data_slice, keep_res=True)
xd_sample_tmed = tukey_median(data_slice)['barycenter']
xd_sample_mmed = Cmardia_median(data_slice)
xd_sample_bmed = marg_med(data_slice)
xd_sample_hmean = rsc_mean(data_slice)

Alternatively, we could take the median of the visibility amplitude and the Mardia median of the phase. While this is an improvement on doing the median on cartesian coordinates separately, it still does not wholly consider the complex data. The geometric median or the Tukey median would be preferable methods.

In [None]:
med_ests = list(zip([xd_sample_gmean, xd_sample_gmed, xd_sample_tmed, xd_sample_mmed, \
                     xd_sample_bmed, xd_sample_hmean], \
               ['Geometric Mean', 'Geometric Median', 'Tukey Median', 'Mardia Median', \
                'Marginal Median', 'HERA Mean'], \
               ['co', 'ro', 'yo', 'ko', 'bo', 'go']))
for me in med_ests:
    print('{:17s}: {:4f}'.format(me[1], me[0]))

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))

ax.scatter(flt_nan(data_slice).real, flt_nan(data_slice).imag, alpha=0.5)
for i, med_est in enumerate(med_ests):
    ax.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])

# zoomed in sub region of the original image
axins = zoomed_inset_axes(ax, zoom=6, loc=4)
axins.scatter(flt_nan(data_slice).real, flt_nan(data_slice).imag, alpha=0.5)
for i, med_est in enumerate(med_ests):
    axins.plot(med_est[0].real, med_est[0].imag, med_est[2])

x1 = np.floor(np.min([i[0].real for i in med_ests[:-2]]))
x2 = np.ceil(np.max([i[0].real for i in med_ests[:-2]]))
y1 = np.floor(np.min([i[0].imag for i in med_ests[:-2]]))
y2 = np.ceil(np.max([i[0].imag for i in med_ests[:-2]]))
axins.set_xlim(x1, x2)
axins.set_ylim(y1, y2)

axins.tick_params(axis='x', direction='in', pad=-15)
mark_inset(ax, axins, loc1=1, loc2=3, fc='none', ec='0.5')

ax.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
            xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'))
ax.set_xlabel(r'$\mathfrak{Re} \; (V)$')
ax.set_ylabel(r'$\mathfrak{Im} \; (V)$')
ax.set_title(textwrap.fill('Bivariate location estimators for redundant '\
    'visibilities aggregated across JDs', 60))

ax.legend(loc=1, prop={'size': 8})

plt.show()

In [None]:
g = sns.jointplot(x=flt_nan(data_slice).real, y=flt_nan(data_slice).imag, \
                  kind='kde', height=8, cmap='Blues', fill=True, space=0)
g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14)
for i, med_est in enumerate(med_ests):
    g.ax_joint.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])
legend_properties = {'size': 10}
g.ax_joint.legend(prop=legend_properties, loc='upper right')
g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
    xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
    size=14)
plt.tight_layout()
plt.show()

In [None]:
# use 2 time integrations for each median, as done in HERA LST-binning
new_no_tints = int(np.ceil(xd_data.shape[2]/2))
xd_gmed_res = np.empty((xd_data.shape[1], new_no_tints), dtype=complex)
xd_tmed_res, xd_bmed_res, xd_hmean_res = [np.empty_like(xd_gmed_res) for _ in range(3)]

gmed_ij = None
slct_bl_idxs = np.array([bl_dict[slct_bl] for slct_bl in reds[bl_grp]])
xd_data_b = xd_data[..., slct_bl_idxs]
for freq in range(xd_data_b.shape[1]):
    for tint in range(new_no_tints):
        xd_data_bft = xd_data_b[:, freq, 2*tint:2*tint+2, :].flatten()
        if np.isnan(xd_data_bft).all():
            gmed_ft = tmed_ft = bmed_ft = hmean_ft = np.nan
        else:
            gmed_ft = geometric_median(xd_data_bft, init_guess=gmed_ij, keep_res=True)
            tmed_ft = tukey_median(xd_data_bft)['barycenter']
            bmed_ft = marg_med(xd_data_bft)
            hmean_ft = rsc_mean(xd_data_bft)
        xd_gmed_res[freq, tint] = gmed_ft
        xd_tmed_res[freq, tint] = tmed_ft
        xd_bmed_res[freq, tint] = bmed_ft
        xd_hmean_res[freq, tint] = hmean_ft

In [None]:
arrs = [xd_gmed_res, xd_tmed_res, xd_bmed_res, xd_hmean_res]

tr_arrs = lambda x, np_fn: [getattr(np, np_fn)(i) for i in x]
garrs = [tr_arrs(arrs, 'abs'), tr_arrs(arrs, 'angle'), tr_arrs(arrs, 'real'), tr_arrs(arrs, 'imag')]
garrs = [[arr[i] for arr in garrs] for i in range(len(garrs[0]))]

titles = ['Geometric Median', 'Tukey Median', 'Marginal Median', 'HERA Mean']
ylabels = ['Amp', 'Phase', r'$\mathfrak{Re}$', r'$\mathfrak{Im}$']
ylabels = [ylab + '\n\nFrequency channel' for ylab in ylabels]

grid_heatmaps(garrs, titles=titles, figsize=(12, 10), ybase=25, clip_pctile=1, \
              xlabels='Time bin', yticklabels=plt_chans, ylabels=ylabels)

#### Smoothness of median results

Calculate standard deviation of the distances between successive points in either frequency or time to get an idea of the smoothness of the location results.

##### Standard deviation of absolute distances

In [None]:
# in time
t_smoothness = []
for arr in arrs:
    t_stds = np.empty(arr.shape[0])
    for f in range(arr.shape[0]):
        dists = np.abs(np.ediff1d(arr[f, :]))
        t_stds[f] = np.nanstd(dists)
    t_smoothness.append(np.nanmean(t_stds))
print('Smoothness in time: \n{}\n{}\n'.format(titles, t_smoothness))

# in frequency
f_smoothness = []
for arr in arrs:
    f_stds = np.empty(arr.shape[1])
    for t in range(arr.shape[1]):
        dists = np.abs(np.ediff1d(arr[:, t]))
        f_stds[t] = np.nanstd(dists)
    f_smoothness.append(np.nanmean(f_stds))
print('Smoothness in frequency: \n{}\n{}'.format(titles, f_smoothness))

##### Standard deviation of complex differences

In [None]:
# in time
t_smoothness = []
for arr in arrs:
    t_stds = np.empty(arr.shape[0])
    for f in range(arr.shape[0]):
        dists = np.ediff1d(arr[f, :])
        t_stds[f] = np.nanstd(dists)
    t_smoothness.append(np.nanmean(t_stds))
print('Smoothness in time: \n{}\n{}\n'.format(titles, t_smoothness))

# in frequency
f_smoothness = []
for arr in arrs:
    f_stds = np.empty(arr.shape[1])
    for t in range(arr.shape[1]):
        dists = np.ediff1d(arr[:, t])
        f_stds[t] = np.nanstd(dists)
    f_smoothness.append(np.nanmean(f_stds))
print('Smoothness in frequency: \n{}\n{}'.format(titles, f_smoothness))

#### Biggest difference in geometric median and HERA mean

In [None]:
ok_slice = np.mean(flags, axis=(0, 3))
even_end = int(np.floor(ok_slice.shape[1]/2))*2
ok_slice_ = 0.5 * (ok_slice[:, :even_end:2] + ok_slice[:, 1:even_end:2]) # since 2 tints are used
odd_arr = even_end != flags.shape[2]
if even_end != flags.shape[2]:
    ok_slice_ = np.append(ok_slice_, np.expand_dims(ok_slice[:, -1], 1), axis=1)
ok_slice = ok_slice_ < 0.5 # only if less than 50% of flags are flagged

In [None]:
bd_idx = np.unravel_index(np.nanargmax(np.abs(xd_gmed_res[ok_slice] - xd_hmean_res[ok_slice])), \
                          xd_gmed_res.shape)
print('Frequency/time slice {} shows strong deviation between the geometric median and the '\
      'HERA mean.'.format(bd_idx))
bd_data = xd_data_b[:, bd_idx[0], 2*bd_idx[1]:2*bd_idx[1]+2, :].flatten()

bd_med_ests = list(zip([xd_gmed_res[bd_idx], xd_hmean_res[bd_idx]], \
                       ['Geometric Median', 'HERA Mean'], \
                       ['ro', 'go']))

g = sns.jointplot(x=flt_nan(bd_data).real, y=flt_nan(bd_data).imag, \
                  kind='kde', height=8, cmap='Blues', fill=True, space=0)
g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14)
for i, med_est in enumerate(bd_med_ests):
    g.ax_joint.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])
legend_properties = {'size': 10}
g.ax_joint.legend(prop=legend_properties, loc='upper right')
g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
    xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
    size=14)
plt.tight_layout()
plt.show()

#### More density plots

To help visualize the data.

In [None]:
if plot_figs:
     # pick ok data slices to plot as examples of redundant visibility distributions
    ok_idxs = np.where(ok_slice)
    index = np.random.choice(ok_idxs[0].shape[0], 2, replace=False)  
    slice1 = ok_idxs[0][index[0]], ok_idxs[1][index[0]]
    slice2 = ok_idxs[0][index[1]], ok_idxs[1][index[1]]
    
    print('Random slices: {} & {}'.format(slice1, slice2))
    
    for rnd_slice in (slice1, slice2):

        bd_data = xd_data_b[:, rnd_slice[0], 2*rnd_slice[1]:2*rnd_slice[1]+2, :].flatten()    

        rnd_med_ests = list(zip([xd_gmed_res[rnd_slice], xd_hmean_res[rnd_slice]], \
                                ['Geometric Median', 'HERA Mean'], \
                                ['ro', 'go']))   

        g = sns.jointplot(x=flt_nan(bd_data).real, y=flt_nan(bd_data).imag, \
                      kind='kde', height=8, cmap='Blues', fill=True, space=0)
        g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14)
        for i, med_est in enumerate(rnd_med_ests):
            g.ax_joint.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])
        legend_properties = {'size': 10}
        g.ax_joint.legend(prop=legend_properties, loc='upper right')
        g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
            xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
            size=14)
        plt.tight_layout()
        plt.show()

In [None]:
slct_btint = 0

def get_data(freq):
    return xd_data_b[:, freq, 2*slct_btint:2*slct_btint+2, :]

initd = get_data(0)
g = sns.JointGrid(x=flt_nan(initd).real, y=flt_nan(initd).imag, height=8)
g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14, labelpad=-2)

re_lim = (np.floor(np.nanmin(xd_data_b.real)), np.ceil(np.nanmax(xd_data_b.real)))
im_lim = (np.floor(np.nanmin(xd_data_b.imag)), np.ceil(np.nanmax(xd_data_b.imag)))

def prep_axes(g, xlim, ylim):
    g.ax_joint.clear()
    g.ax_joint.set_xlim(xlim)
    g.ax_joint.set_ylim(ylim)
    g.ax_marg_x.clear()
    g.ax_marg_x.set_xlim(xlim)
    g.ax_marg_y.clear()
    g.ax_marg_y.set_ylim(ylim)
    plt.setp(g.ax_marg_x.get_xticklabels(), visible=False)
    plt.setp(g.ax_marg_y.get_yticklabels(), visible=False)
    plt.setp(g.ax_marg_x.yaxis.get_majorticklines(), visible=False)
    plt.setp(g.ax_marg_x.yaxis.get_minorticklines(), visible=False)
    plt.setp(g.ax_marg_y.xaxis.get_majorticklines(), visible=False)
    plt.setp(g.ax_marg_y.xaxis.get_minorticklines(), visible=False)
    plt.setp(g.ax_marg_x.get_yticklabels(), visible=False)
    plt.setp(g.ax_marg_y.get_xticklabels(), visible=False)

def animate(freq):
    data_f = get_data(freq)
    g.x, g.y = flt_nan(data_f).real, flt_nan(data_f).imag
    prep_axes(g, re_lim, im_lim)
    g.plot_joint(sns.kdeplot, cmap='Blues', fill=True)
    g.plot_marginals(sns.kdeplot, shade=True)
    g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14)

    med_ests_f = list(zip([xd_gmed_res[freq, slct_btint], xd_hmean_res[freq, slct_btint]], \
                          ['Geometric Median', 'HERA Mean'], \
                          ['ro', 'go']))
    for i, med_est in enumerate(med_ests_f):
        g.ax_joint.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])
    g.ax_joint.legend(prop={'size': 10}, loc='upper right')
    g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
        xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
        size=14)

ani = FuncAnimation(g.fig, animate, frames=xd_data_b.shape[1], interval=750, repeat=False)

temp_dir = tempfile.mkdtemp()
ani.save(os.path.join(temp_dir, 'kdes.mp4'), writer='ffmpeg', dpi=200)

plt.close()

Video(os.path.join(temp_dir, 'kdes.mp4'), width=600, embed=True)

In [None]:
shutil.rmtree(temp_dir)

### Test of normality

#### Shapiro-Wilk test

We test the aggregated visibility data (over days, redundant baselines and consecutive time integrations) for normality using the Shapiro-Wilk test, to see if the data is Gaussian distributed for the $\mathfrak{Re}$ and $\mathfrak{Im}$ components separately, thus justifying the use of the mean.

In [None]:
shapiro_w_re = np.empty_like(xd_gmed_res, dtype=float)
shapiro_w_im, shapiro_p_re, shapiro_p_im = [np.empty_like(shapiro_w_re) for _ in range(3)]
for freq in range(xd_data_b.shape[1]):
    for tint in range(new_no_tints):
        xd_data_bft = flt_nan(xd_data_b[:, freq, 2*tint:2*tint+2, :].flatten())
        if np.isnan(xd_data_bft).all():
            re_shapiro_stat = im_shapiro_stat = re_shapiro_pval = re_shapiro_pval = np.nan
        else:
            re_shapiro = shapiro(xd_data_bft.real)
            im_shapiro = shapiro(xd_data_bft.imag)
            re_shapiro_stat = re_shapiro.statistic
            im_shapiro_stat = im_shapiro.statistic
            re_shapiro_pval = re_shapiro.pvalue
            re_shapiro_pval = im_shapiro.pvalue
        
        shapiro_w_re[freq, tint] = re_shapiro.statistic
        shapiro_w_im[freq, tint] = im_shapiro.statistic
        shapiro_p_re[freq, tint] = re_shapiro.pvalue
        shapiro_p_im[freq, tint] = im_shapiro.pvalue

In [None]:
titles = [[r'$W \; \mathrm{statistic} \; - \; \mathfrak{Re}(V)$', \
          r'$p \; \mathrm{value} \; - \; \mathfrak{Re}(V)$'], \
          [r'$W \; \mathrm{statistic} \; - \; \mathfrak{Im}(V)$', \
          r'$p \; \mathrm{value} \; - \; \mathfrak{Im}(V)$']]
grid_heatmaps([[shapiro_w_re, shapiro_p_re], [shapiro_w_im, shapiro_p_im]], \
             titles=titles, figsize=(14, 7), ybase=25, share_cbar=True, clip_pctile=1, \
             xlabels='Time bin', yticklabels=plt_chans, ylabels='Frequency channel')

In [None]:
# example histograms for aggregated visibility data

# picking frequency/time slice with worst shapiro statistic for Re visibilities
re_shap_min = np.unravel_index(np.nanargmin(shapiro_p_re), shapiro_w_re.shape)
print('Slice {} has Shapiro-Wilk test p value {:.5f} for the Re component.\n'\
      .format(re_shap_min, shapiro_p_re[re_shap_min]))

print('If the p value < the chosen alpha level (usually taken to be 0.05), then the null hypothesis '\
      'is rejected and there is evidence that the data tested are not normally distributed')

hist_data = flt_nan(xd_data_b[:, re_shap_min[0], re_shap_min[1], :])

fig, ax = plt.subplots(ncols=2, figsize=(14, 7))

sns.histplot(hist_data.real, ax=ax[0], binwidth=2.5, kde=True)
sns.histplot(hist_data.imag, ax=ax[1], binwidth=2.5, kde=True)

ax[0].set_xlabel(r'$\mathfrak{Re}(V)$')
ax[1].set_xlabel(r'$\mathfrak{Im}(V)$')

plt.show()

#### Henze-Zirkler multivariate normality test

We use the HZ test as this considers the entirety of the data. Note that many alternatives tests also exist and that a single statistic does not definitely conclude if the multivariate data is normality distributed or not. 

In [None]:
# MAD-clipping about Re and Im separately, like HERA
nan_flags = np.isnan(xd_data_b)
re_clip_f = mad_clip(xd_data_b.real, axis=(0, 3), flags=nan_flags, verbose=True)[1]
im_clip_f = mad_clip(xd_data_b.imag, axis=(0, 3), flags=nan_flags, verbose=True)[1]

xd_data_bc = xd_data_b.copy()
xd_data_bc[re_clip_f + im_clip_f] *= np.nan

In [None]:
hz_r = np.empty_like(shapiro_w_re)
hz_p = np.empty_like(hz_r)
hz_n = np.empty_like(hz_r, dtype=bool)

hz_r_c = np.empty_like(hz_r)
hz_p_c = np.empty_like(hz_r)
hz_n_c = np.empty_like(hz_n)

bool_dict = {'NO': False, 'YES': True, np.nan: False}

for freq in range(xd_data_b.shape[1]):
    for tint in range(new_no_tints):
        xd_data_bft = flt_nan(xd_data_b[:, freq, 2*tint:2*tint+2, :].flatten())
        xd_data_bcft = flt_nan(xd_data_bc[:, freq, 2*tint:2*tint+2, :].flatten())
        
        hz_res = mv_normality(xd_data_bft, method='hz')
        hz_r[freq, tint] = hz_res['HZ']
        hz_p[freq, tint] = hz_res['p value']
        hz_n[freq, tint] = bool_dict[hz_res['MVN']]
        
        hz_res_c = mv_normality(xd_data_bcft, method='hz')
        hz_r_c[freq, tint] = hz_res_c['HZ']
        hz_p_c[freq, tint] = hz_res_c['p value']
        hz_n_c[freq, tint] = bool_dict[hz_res_c['MVN']]

In [None]:
titles = [r'$HZ \; \mathrm{statistic}$', r'$p \; \mathrm{value}$', 'Normality']
row_heatmaps([hz_r, hz_p, hz_n], titles=titles, figsize=(14, 7), share_cbar=False, \
             cbar_loc=None, clip_pctile=1, xlabels='Time bin', ylabel='Frequency channel', \
             yticklabels=plt_chans)

In [None]:
# MAD-clipped data
titles = [r'$HZ \; \mathrm{statistic}$', r'$p \; \mathrm{value}$', 'Normality']
row_heatmaps([hz_r_c, hz_p_c, hz_n_c], titles=titles, figsize=(14, 7), share_cbar=False, \
             cbar_loc=None, clip_pctile=1, xlabels='Time bin', ylabel='Frequency channel', \
             yticklabels=plt_chans)

In [None]:
# picking frequency/time slice with worst HZ statistic
hz_p_min = np.unravel_index(np.nanargmin(hz_p), hz_p.shape)
print('Slice {} has HZ test p value {:.5f}.\n'\
      .format(hz_p_min, hz_p[hz_p_min]))

print('If the p value < the chosen alpha level (usually taken to be 0.05), then the null hypothesis '\
      'is rejected and there is evidence that the data tested are not normally distributed')

hz_data = flt_nan(xd_data_b[:, hz_p_min[0], 2*hz_p_min[1]:2*hz_p_min[1]+2, :].flatten())

bhz_med_ests = list(zip([xd_gmed_res[hz_p_min], xd_hmean_res[hz_p_min]], \
                        ['Geometric Median', 'HERA Mean'], \
                        ['ro', 'go']))

g = sns.jointplot(x=hz_data.real, y=hz_data.imag, \
                  kind='kde', height=8, cmap='Blues', fill=True, space=0)
for i, med_est in enumerate(bhz_med_ests):
    g.ax_joint.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])
g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14)
legend_properties = {'size': 10}
g.ax_joint.legend(prop=legend_properties, loc='upper right')
g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
    xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
    size=14)
plt.tight_layout()
plt.show()

### Multivariate outlier detection

We use the robust Mahalanobis distance to detect outliers in the complex HERA data, as opposed to performing MAD-clipping on the $\mathfrak{Re}$ and$\mathfrak{Im}$ components separately.

#### Slice with worst HZ statistic

In [None]:
mvo_res = mv_outlier(hz_data)
mvo_res.head(5)

In [None]:
# MAD-clipping about Re and Im separately
re_clip_f_mvo = mad_clip(hz_data.real, verbose=True)[1]
im_clip_f_mvo = mad_clip(hz_data.imag, verbose=True)[1]

mvo_res['MAD-clip'] = re_clip_f_mvo + im_clip_f_mvo

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(14, 6), sharey=True)
sns.scatterplot(x=hz_data.real, y=hz_data.imag, hue=mvo_res['Mahalanobis Distance'], ax=axes[0])
sns.scatterplot(x=hz_data.real, y=hz_data.imag, hue=mvo_res['Outlier'], ax=axes[1])
sns.scatterplot(x=hz_data.real, y=hz_data.imag, hue=mvo_res['MAD-clip'], ax=axes[2])
axes[0].set_ylabel(r'$\mathfrak{Im} \; (V)$')
for ax in axes:
    ax.set_xlabel(r'$\mathfrak{Re} \; (V)$')
plt.tight_layout()
plt.show()

In [None]:
# 97.5% quantile of the chi-square distribution is classically taken for outlier threshold
# let's look at a stricter threshold:
chi2_quantile = 0.99
strct_outliers = np.where(mvo_res['Mahalanobis Distance'].values > chi2.ppf(chi2_quantile, 2))[0]
print('Outliers when taking the chi-square quantile to be {}% are:'.format(chi2_quantile*100))
print(*hz_data[strct_outliers].tolist(), sep='\n')

#### Sifting through the entire dataset

In [None]:
no_dp = xd_data_b.shape[0]*xd_data_b.shape[-1]*2
mah_outliers = np.empty((xd_data.shape[1], new_no_tints, no_dp), dtype=bool)

uf_xd_data = sample_xd_data['data'][..., slct_bl_idxs]

for freq in range(xd_data_b.shape[1]):
    for tint in range(new_no_tints):
        xd_data_bft = uf_xd_data[:, freq, 2*tint:2*tint+2, :].flatten()
        if np.isnan(xd_data_bft).all():
            out_ft = np.empty(no_dp)*np.nan
        else:
            out_ft = mv_outlier(xd_data_bft)['Outlier']
        if odd_arr and tint == new_no_tints-1:
            out_ft = np.append(out_ft, np.zeros_like(out_ft).astype(bool))
        mah_outliers[freq, tint] = out_ft

In [None]:
no_outliers = mah_outliers.sum(axis=-1)/mah_outliers.shape[-1]*100
row_heatmaps(no_outliers, clip_pctile=2, xlabels='Time bin', ylabel='Frequency channel', \
             titles=['Percentage of of outliers found with the robust Mahalanobis distance '\
             'technique'], yticklabels=plt_chans)

In [None]:
if odd_arr:
    tint_dim = new_no_tints - 1
else:
    tint_dim = new_no_tints

cal_flags_xdbl = flags.sum(axis=(0, -1))
cal_flags = cal_flags_xdbl[:, :even_end].reshape((flags.shape[1], tint_dim, -1)).sum(axis=-1)

mad_flags_xdbl = (re_clip_f + im_clip_f).sum(axis=(0, -1))
mad_flags = mad_flags_xdbl[:, :even_end].reshape((flags.shape[1], tint_dim, -1)).sum(axis=-1)

comb_flags_xdbl = (flags + re_clip_f + im_clip_f).sum(axis=(0, -1))
comb_flags = comb_flags_xdbl[:, :even_end].reshape((flags.shape[1], tint_dim, -1)).sum(axis=-1)

if odd_arr:
    cal_flags = np.append(cal_flags, np.expand_dims(cal_flags_xdbl[:, -1], 1), axis=1)
    mad_flags = np.append(mad_flags, np.expand_dims(mad_flags_xdbl[:, -1], 1), axis=1)    
    comb_flags = np.append(comb_flags, np.expand_dims(comb_flags_xdbl[:, -1], 1), axis=1)
    
cal_f_pct = cal_flags / mah_outliers.shape[-1]*100
mad_f_pct = mad_flags / mah_outliers.shape[-1]*100
comb_f_pct = comb_flags / mah_outliers.shape[-1]*100

if odd_arr:
    cal_f_pct[:, -1] *= 2
    mad_f_pct[:, -1] *= 2
    comb_f_pct[:, -1] *= 2

In [None]:
titles=['Percentage of of flagged data from calibration', \
        'Percentage of of flagged data from MAD-clipping', \
        'Percentage of of flagged data from calibration + MAD-clipping']
titles = [textwrap.fill(t, 40) for t in titles]

row_heatmaps([cal_f_pct, mad_f_pct, comb_f_pct], clip_pctile=2, figsize=(14, 6), \
             titles=titles, xlabels='Time bin', ylabel='Frequency channel', yticklabels=plt_chans)

### Statistical properties of location estimates

#### Visualize data

In [None]:
fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(8, 8), sharey='row', sharex='col')

axes[0][0].plot(np.abs(xd_gmed_res), alpha=0.7)
axes[0][1].plot(np.abs(xd_hmean_res), alpha=0.7)
axes[1][0].plot(np.angle(xd_gmed_res), alpha=0.7)
axes[1][1].plot(np.angle(xd_hmean_res), alpha=0.7)

axes[0][0].set_ylabel(r'$|V|$')
axes[1][0].set_ylabel(r'$\varphi$')

axes[1][0].set_xlabel('Frequency channel')
axes[1][1].set_xlabel('Frequency channel')

axes[0][0].set_title('Geometric Median')
axes[0][1].set_title('HERA Mean')

for axr in axes:
    for axc in axr:
        axc.set_xticks(np.arange(plt_chans.size)[::25])
        axc.set_xticklabels(plt_chans[::25])

plt.tight_layout()
plt.show()

##### Fill in gaps

In [None]:
# grid interpolation to replace nan values

gmed_interp2 = nan_interp2d(xd_gmed_res)
hmean_interp2 = nan_interp2d(xd_hmean_res)

In [None]:
nan_chans = np.where(np.isnan(xd_gmed_res).any(axis=1))[0]
flt_chans = chans.copy()
if 0 in nan_chans:
    flt_chans = flt_chans[1:]
if chans.size - 1 in nan_chans:
    flt_chans = flt_chans[:-1]

### Nonparametric kernel regression

In [None]:
# can do similarly for phases; alternatively do a 2D KDE with e.g.
# statsmodels.nonparametric.kernel_density.KDEMultivariate

kde_abs_gmed = np.empty_like(gmed_interp2, dtype=float)
kde_abs_hmean = np.empty_like(kde_abs_gmed)

for btint in range(gmed_interp2.shape[1]):
    kde_gmed = KernelReg(endog=np.abs(gmed_interp2[:, btint]), exog=flt_chans, \
                         reg_type='ll', var_type='c', bw=[3])
    kde_abs_gmed[:, btint] = kde_gmed.fit(flt_chans)[0]
    
    kde_hmean = KernelReg(endog=np.abs(hmean_interp2[:, btint]), exog=flt_chans, \
                          reg_type='ll', var_type='c', bw=[3])
    kde_abs_hmean[:, btint] = kde_hmean.fit(flt_chans)[0]

In [None]:
fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(8, 6), sharex='col', sharey='row')

axes[0][0].plot(flt_chans, kde_abs_gmed, alpha=0.7)
axes[0][1].plot(flt_chans, kde_abs_hmean, alpha=0.7)

gmed_residual = np.abs(gmed_interp2) - kde_abs_gmed
axes[1][0].plot(flt_chans, gmed_residual, alpha=0.3)
axes[1][0].plot(flt_chans, gmed_residual.mean(axis=1))

hmean_residual = np.abs(hmean_interp2) - kde_abs_hmean
axes[1][1].plot(flt_chans, hmean_residual, alpha=0.3)
axes[1][1].plot(flt_chans, hmean_residual.mean(axis=1))

axes[1][0].set_xlabel('Frequency channel')
axes[1][1].set_xlabel('Frequency channel')
axes[0][0].set_ylabel(r'$|V|$')
axes[1][0].set_ylabel('Residual')

axes[0][0].set_title('Geometric Median KDE')
axes[0][1].set_title('HERA Mean KDE')

plt.tight_layout()
plt.show()

#### Allan deviation

In [None]:
try:
    import allantools
    
    f_resolution = np.median(np.ediff1d(freqs))
    t_resolution = np.median(hd.integration_time)
    
    rate_exp = np.log10(f_resolution)
    tau_min = np.ceil(np.abs(rate_exp))*np.sign(rate_exp)

    taus = np.logspace(tau_min, tau_min+np.ceil(np.log10(gmed_interp2.shape[0])), 1000)
    
    gmed_ads = np.empty((int(np.floor(gmed_interp2.shape[0]/2)), gmed_interp2.shape[1]))
    hmean_ads = np.empty_like(gmed_ads)
    
    for btint in range(gmed_interp2.shape[1]):
        # do OADEV on residuals rather than on signal with structure
        gmed_taus2, gmed_ad, gmed_ade, gmed_ns = allantools.oadev(gmed_residual[:, btint], \
            rate=1/f_resolution, data_type='freq', taus=taus)

        hmean_taus2, hmean_ad, hmean_ade, hmean_ns = allantools.oadev(hmean_residual[:, btint], \
            rate=1/f_resolution, data_type='freq', taus=taus)
        
        gmed_ads[:, btint] = gmed_ad
        hmean_ads[:, btint] = hmean_ad
        
    fig, ax = plt.subplots(ncols=2, figsize=(10, 6), sharey=True)
    
    ax[0].loglog(gmed_taus2, gmed_ads, alpha=0.5)
    ax[1].loglog(hmean_taus2, hmean_ads, alpha=0.5)
    
    ax[0].set_title('Geometric Median')
    ax[1].set_title('HERA Mean')
    
    ax[0].set_ylabel('Overlapping Allan deviation')
    ax[0].set_xlabel(r'$\tau$')
    ax[1].set_xlabel(r'$\tau$')
    
    plt.suptitle('Allan deviation')

    plt.show()
    
except ImportError:
    # get AllanTools package here https://github.com/aewallin/allantools
    # or do pip install allantools
    print('AllanTools package not installed - skipping.')

#### Power spectrum

##### Single time integration

In [None]:
gmed_delay, gmed_pspec = signal.periodogram(gmed_interp2[:, 0], fs=1/f_resolution, \
    window='hann', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False)

delay_sort = np.argsort(gmed_delay)
gmed_delay = gmed_delay[delay_sort]
gmed_pspec = gmed_pspec[delay_sort]

hmean_delay, hmean_pspec = signal.periodogram(hmean_interp2[:, 0], fs=1./f_resolution, \
    window='hann', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False)

delay_sort = np.argsort(hmean_delay)
hmean_delay = hmean_delay[delay_sort]
hmean_pspec = hmean_pspec[delay_sort]

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(gmed_delay, gmed_pspec, label='Geometric Median', alpha=0.8)
ax.plot(hmean_delay, hmean_pspec, label='HERA Mean', alpha=0.8)

ax.set_yscale('log')
ax.set_ylabel('Power spectrum')
ax.set_xlabel('Delay')
ax.legend(loc='upper right')

plt.show()

##### All time integrations

In [None]:
gmed_delay, gmed_pspec = signal.periodogram(gmed_interp2, fs=1/f_resolution, \
    window='hann', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False, axis=0)

delay_sort = np.argsort(gmed_delay)
gmed_delay = gmed_delay[delay_sort]
gmed_pspec = gmed_pspec[delay_sort, :]

hmean_delay, hmean_pspec = signal.periodogram(hmean_interp2, fs=1/f_resolution, \
    window='hann', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False, axis=0)

delay_sort = np.argsort(hmean_delay)
hmean_delay = hmean_delay[delay_sort]
hmean_pspec = hmean_pspec[delay_sort, :]

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(10, 6), sharey=True)

axes[0].plot(gmed_delay, gmed_pspec, alpha=0.3)
axes[0].plot(gmed_delay, gmed_pspec.mean(axis=1), alpha=1, color='orange')
axes[0].set_ylabel('Power spectrum')

axes[1].plot(hmean_delay, hmean_pspec, alpha=0.3)
axes[1].plot(hmean_delay, hmean_pspec.mean(axis=1), alpha=1, color='purple')

axes[2].plot(gmed_delay, gmed_pspec.mean(axis=1), alpha=0.8, color='orange', label='Geometric Median')
axes[2].plot(hmean_delay, hmean_pspec.mean(axis=1), alpha=0.8, color='purple', label='HERA Mean')

for ax in axes:
    ax.set_yscale('log')
    ax.set_xlabel('Delay')
    
axes[0].set_title('Geometric Median')
axes[1].set_title('HERA Mean')
axes[2].set_title('Comparison')
axes[2].legend(loc='best')

plt.suptitle('Power spectra')

plt.show()