<center><strong><font size=+3>Outlier Detection of HERA Data with Robust Mahalanobis Distances</font></center>
<br><br>
</center>
<center><strong><font size=+2>Matyas Molnar and Bojan Nikolic</font><br></strong></center>
<br><center><strong><font size=+1>Astrophysics Group, Cavendish Laboratory, University of Cambridge</font></strong></center>

In [None]:
import itertools
import os

import matplotlib as mpl
import matplotlib.patches as patches
import numpy as np
import seaborn as sns
from astropy.stats import mad_std
from matplotlib import pyplot as plt
from scipy import signal, special, stats
from sklearn.covariance import MinCovDet

from robstat.stdstat import mad_clip
from robstat.utils import DATAPATH, decomposeCArray, flt_nan

In [None]:
%matplotlib inline

In [None]:
plot_figs = True
if plot_figs:
    mpl.rcParams['figure.figsize'] = (12, 8)
    mpl.rcParams['figure.dpi'] = 300
else:
    mpl.rcParams['figure.figsize'] = (5, 3)
    mpl.rcParams['figure.dpi'] = 125

In [None]:
# parameters
sigma = 5.0  # number of normal standard deviations for clipping
min_N = 5  # minimum length of array to clip, below which no clipping is performed.

### Load HERA visibility data

In [None]:
vis_file = os.path.join(DATAPATH, 'lstb_no_avg/idr2_lstb_14m_ee_1.40949.npz')
vis_data = np.load(vis_file)

In [None]:
data = vis_data['data']
redg = vis_data['redg']
pol = vis_data['pol'].item()

flags = np.isnan(data)
no_chans = data.shape[1]
chans = np.arange(no_chans)
freqs = np.linspace(1e8, 2e8, 1025)[:-1]

# for cleaner file naming
if not isinstance(sigma, int):
    if sigma.is_integer():
        sigma = int(sigma)

### Visualize outlier detection

In [None]:
# example data
# good e.g. [:, 514, 34, 31] & [:, 766, 13, 4] bad e.g. [:, 782, 21, 60] & [:, 244, 38, 32]
eg_data = data[:, 782, 21, 60]
points = decomposeCArray(flt_nan(eg_data.flatten()))

In [None]:
robust_cov = MinCovDet().fit(points)

# relate in terms of probabilities:
# the probability that a normal deviate lies in the range between  \mu - n*\sigma and \mu + n*\sigma:
chi2_p = special.erf(sigma/np.sqrt(2))
# transform this probability to chi^2 quantile
chi2_q = stats.chi2.ppf(chi2_p, df=points.shape[1])

# # Fisher approximation:
# chi2_q = 0.5 * (sigma + np.sqrt(2*points.shape[1] - 1))**2
# chi2_p = stats.chi2.cdf(chi2_q, points.shape[1])

# # if want to set probability first instead
# chi2_p = 0.975
# chi2_q = stats.chi2.ppf(chi2_p, df=points.shape[1])

print('χ^2 quantile corresponding to {}σ (p = {:.7f}) is {:.7f}'.\
      format(sigma, chi2_p, chi2_q))

rmd_outliers = np.where(robust_cov.mahalanobis(points) > chi2_q)[0]

In [None]:
real_lab = r'$\mathfrak{Re} \; (V)$'
imag_lab = r'$\mathfrak{Im} \; (V)$'

#### RMD

In [None]:
# get RMD ellipse parameters from covariance matrix
eig_vals, eig_vecs = np.linalg.eig(robust_cov.covariance_)
radii = np.sqrt(eig_vals)
lrg_ev = eig_vecs[np.argmax(eig_vals)]
alpha = np.arctan2(eig_vals[0] - robust_cov.covariance_[0][0], robust_cov.covariance_[0][1])

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))

ax.set_facecolor('red')
ax.patch.set_alpha(0.25)

z = np.sqrt(chi2_q)
ellipse = patches.Ellipse(xy=robust_cov.location_, width=2*z*radii[0], height=2*z*radii[1], \
                          angle=alpha*180/np.pi, edgecolor='None', fc='white', lw=2, ls='--', 
                          zorder=0)
ax.add_patch(ellipse)

inliers = np.delete(points, rmd_outliers, axis=0)
sns.scatterplot(x=inliers[:, 0], y=inliers[:, 1], ax=ax, label='Inliers', alpha=0.8)
sns.scatterplot(x=points[rmd_outliers, 0], y=points[rmd_outliers, 1], color='red', ax=ax, \
                label='Outliers', alpha=0.8, zorder=2)
sns.scatterplot(x=[robust_cov.location_[0]], y=[robust_cov.location_[1]], color='darkorange', \
                ax=ax, label='MCD location', marker='+', zorder=2)

# Create meshgrid of feature values
xx, yy = np.meshgrid(np.linspace(plt.xlim()[0], plt.xlim()[1], 1001),
                     np.linspace(plt.ylim()[0], plt.ylim()[1], 1001))
zz = np.c_[xx.ravel(), yy.ravel()]

# Calculate the MCD based Mahalanobis distances
mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = ax.contour(xx, yy, np.sqrt(mahal_robust_cov), cmap=plt.cm.YlOrBr_r, \
                            linestyles='--', zorder=0)
ax.clabel(robust_contour, robust_contour.levels, inline=True, fontsize=10)
thresh_contour = ax.contour(xx, yy, np.sqrt(mahal_robust_cov), [np.sqrt(chi2_q)], colors='red', \
                            linewidths=2, zorder=0)

ax.annotate('Robust Mahalanobis Distance', xy=(0.62, 0.10), xycoords='axes fraction', \
            bbox=dict(boxstyle='round', facecolor='white'), size=10, color='darkorange')

ax.annotate(r'$\chi_{\mathrm{thresh}}$ = '+'{0:.3f}'.format(np.sqrt(chi2_q)), xy=(0.62, 0.03), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, color='red')

ax.set_xlabel(real_lab)
ax.set_ylabel(imag_lab)

rmd_lims = [ax.get_xlim(), ax.get_ylim()]

ax.legend(loc='upper right')
plt.tight_layout()
# plt.savefig('/Users/matyasmolnar/Dropbox/PhD/Papers/memo_mvo/Figures/rmd_eg_plot.pdf')
plt.show()

#### MAD-clipping 

In [None]:
_, f_r = mad_clip(points[:, 0], sigma=sigma, min_N=min_N)
_, f_i = mad_clip(points[:, 1], sigma=sigma, min_N=min_N)

mad_outliers = np.where(f_r + f_i)[0]

In [None]:
gap = 5

fig, ax = plt.subplots(figsize=(7, 5))

inliers = np.delete(points, mad_outliers, axis=0)
sns.scatterplot(x=inliers[:, 0], y=inliers[:, 1], ax=ax, label='Inliers', alpha=0.8)
sns.scatterplot(x=points[mad_outliers, 0], y=points[mad_outliers, 1], color='red', ax=ax, \
                label='Outliers', alpha=0.8)
sns.scatterplot(x=[np.median(points[:, 0])], y=[np.median(points[:, 1])], color='darkgreen', \
                ax=ax, label='Marginal median', marker='+')

mads = mad_std(points, axis=0)
meds = np.median(points, axis=0)

ax.vlines(x=meds[0]-sigma*mads[0], color='red', \
          ymin=meds[1]-sigma*mads[1], ymax=meds[1]+sigma*mads[1], lw=2)
ax.vlines(x=meds[0]+sigma*mads[0], color='red', \
          ymin=meds[1]-sigma*mads[1], ymax=meds[1]+sigma*mads[1], lw=2)
ax.hlines(y=meds[1]-sigma*mads[1], color='red', \
          xmin=meds[0]-sigma*mads[0], xmax=meds[0]+sigma*mads[0], lw=2)
ax.hlines(y=meds[1]+sigma*mads[1], color='red', \
          xmin=meds[0]-sigma*mads[0], xmax=meds[0]+sigma*mads[0], lw=2, \
          label='MAD-clip')

ax.axvspan(meds[0]-(sigma+gap)*mads[0], meds[0]-sigma*mads[0], alpha=0.25, color='red', lw=0)
ax.axvspan(meds[0]+sigma*mads[0], meds[0]+(sigma+gap)*mads[0], alpha=0.25, color='red', lw=0)

rect = patches.Rectangle((meds[0]-sigma*mads[0], meds[1]-(sigma+gap)*mads[1]), 2*sigma*mads[0], \
                         gap*mads[1], edgecolor='r', facecolor='red', \
                         alpha=0.25, lw=0)
ax.add_patch(rect)

rect = patches.Rectangle((meds[0]-sigma*mads[0], meds[1]+sigma*mads[1]), 2*sigma*mads[0], \
                         gap*mads[1], edgecolor='r', facecolor='red', \
                         alpha=0.25, lw=0)
ax.add_patch(rect)

ax.annotate(r'$\mathrm{MAD}_{\mathfrak{Re}} \;$ = '+'{0:.3f}'.format(mads[0]), xy=(0.02, 0.10), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, \
            color='darkgreen')

ax.annotate(r'$\mathrm{MAD}_{\mathfrak{Im}}$ = '+'{0:.3f}'.format(mads[1]), xy=(0.02, 0.03), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, \
            color='darkgreen')

ax.annotate(r'$\mathrm{marg \; med} \pm$' + '{}'.format(sigma) + '$\; \mathrm{MAD} $', xy=(0.74, 0.065), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, \
            color='red')

ax.set_xlabel(real_lab)
ax.set_ylabel(imag_lab)

ax.legend(loc='upper right')

pad = 1.2
ax.set_xlim(left=meds[0]-pad*sigma*mads[0], right=meds[0]+pad*sigma*mads[0])
ax.set_ylim(bottom=meds[1]-pad*sigma*mads[1], top=meds[1]+pad*sigma*mads[1])
# ax.set_xlim(left=rmd_lims[0][0], right=rmd_lims[0][1])
# ax.set_ylim(bottom=rmd_lims[1][0], top=rmd_lims[1][1])

plt.tight_layout()
# plt.savefig('/Users/matyasmolnar/Dropbox/PhD/Papers/memo_mvo/Figures/mad_plot.pdf')
plt.show()

In [None]:
gap = 5

fig, ax = plt.subplots(figsize=(7, 5))

sns.scatterplot(x=points[:, 0], y=points[:, 1], ax=ax, alpha=0.8)
sns.scatterplot(x=[np.median(points[:, 0])], y=[np.median(points[:, 1])], color='darkgreen', \
                ax=ax, label='Marginal median', marker='+', zorder=2)
sns.scatterplot(x=[robust_cov.location_[0]], y=[robust_cov.location_[1]], color='darkorange', \
                ax=ax, label='MCD location', marker='+', zorder=2)

ax.vlines(x=meds[0]-sigma*mads[0], color='darkgreen', linestyle='--', 
          ymin=meds[1]-sigma*mads[1], ymax=meds[1]+sigma*mads[1], zorder=0)
ax.vlines(x=meds[0]+sigma*mads[0], color='darkgreen', linestyle='--', 
          ymin=meds[1]-sigma*mads[1], ymax=meds[1]+sigma*mads[1], zorder=0)
ax.hlines(y=meds[1]-sigma*mads[1], color='darkgreen', linestyle='--', 
          xmin=meds[0]-sigma*mads[0], xmax=meds[0]+sigma*mads[0], zorder=0)
ax.hlines(y=meds[1]+sigma*mads[1], color='darkgreen', linestyle='--', 
          xmin=meds[0]-sigma*mads[0], xmax=meds[0]+sigma*mads[0], \
          label='MAD-clip', zorder=0)

ellipse = patches.Ellipse(xy=robust_cov.location_, width=2*z*radii[0], height=2*z*radii[1], \
                          angle=alpha*180/np.pi, edgecolor='darkorange', fc='None', lw=2, ls='--', \
                          zorder=0)
ax.add_patch(ellipse)
# for label, create dummy point out of view
sns.lineplot(x=[-1000], y=[-1000], color='darkorange', ax=ax, lw=2, ls='--', label='RMD bound')

ellipse = patches.Ellipse(xy=meds, width=2*sigma*mads[0], height=2*sigma*mads[1], 
                  edgecolor='purple', fc='None', lw=1, ls='--', alpha=0.3, zorder=0)
ax.add_patch(ellipse)
# for label, create dummy point out of view
sns.lineplot(x=[-1000], y=[-1000], color='purple', ax=ax, lw=1, ls='--', label='Elliptical bound', alpha=0.3)

ax.annotate(r'$\mathrm{MAD}_{\mathfrak{Re}} \;$ = '+'{0:.3f}'.format(mads[0]), xy=(0.02, 0.10), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, \
            color='darkgreen')

ax.annotate(r'$\mathrm{MAD}_{\mathfrak{Im}}$ = '+'{0:.3f}'.format(mads[1]), xy=(0.02, 0.03), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, \
            color='darkgreen')

ax.annotate(r'$\chi_{\mathrm{thresh}}$ = '+'{0:.3f}'.format(np.sqrt(chi2_q)), xy=(0.8, 0.065), \
            xycoords='axes fraction', bbox=dict(boxstyle='round', facecolor='white'), size=10, \
            color='darkorange')

ax.set_xlabel(real_lab)
ax.set_ylabel(imag_lab)

ax.legend(loc='upper right', prop={'size': 9}, framealpha=0.9)

pad = 1.2
ax.set_xlim(left=meds[0]-pad*sigma*mads[0], right=meds[0]+pad*sigma*mads[0])
ax.set_ylim(bottom=meds[1]-pad*sigma*mads[1], top=meds[1]+pad*sigma*mads[1])
# ax.set_xlim(left=rmd_lims[0][0], right=rmd_lims[0][1])
# ax.set_ylim(bottom=rmd_lims[1][0], top=rmd_lims[1][1])

plt.tight_layout()
# plt.savefig('/Users/matyasmolnar/Dropbox/PhD/Papers/memo_mvo/Figures/rmd_mad_comp_bad.pdf')
plt.show()

#### Comparison of non-contaminated areas

In [None]:
mad_area = 4 * sigma**2 * mads[0] * mads[1]

rmd_area = np.pi * z*radii[0] * z*radii[1]

print('MAD-clip area: {:.2f}'.format(mad_area))
print('RMD-bound area: {:.2f}'.format(rmd_area))
print('Area ratio: {:.2f}%'.format(100 * rmd_area / mad_area))

### Outlier detection on unaveraged LST-binned HERA data

#### MAD-clipping

In [None]:
mad_clip_f_fn = os.path.join(DATAPATH, 'loc_res_nrao', os.path.basename(os.path.basename(vis_file)).\
                             replace('.npz', '.mad_clip_f_{}sig.npz'.format(sigma)))

if not os.path.exists(mad_clip_f_fn):
    _, f_r = mad_clip(data.real, axis=0, sigma=sigma)
    _, f_i = mad_clip(data.imag, axis=0, sigma=sigma)

    mad_clip_f = f_r + f_i
    mad_clip_f = mad_clip_f ^ flags
    
    np.savez(mad_clip_f_fn, flags=mad_clip_f)

else:
    mad_clip_f = np.load(mad_clip_f_fn)['flags']
    
# apply min_N condition
mad_f_min_n = np.logical_not(flags).sum(axis=0) < min_N
mad_f_min_n = np.expand_dims(mad_f_min_n, axis=0)
mad_f_min_n = np.repeat(mad_f_min_n, flags.shape[0], axis=0)
mad_clip_f[mad_f_min_n] = False

print('Number of data point flagged from MAD-clipping: {:,}'.format(mad_clip_f.sum()))

#### RMD

In [None]:
import multiprocess as multiprocessing

# require a shared ctype array in order to fill in a numpy array in parallel

def create_mp_array(arr):
    shared_arr = multiprocessing.RawArray(np.ctypeslib.as_ctypes_type(arr.dtype), int(np.prod(arr.shape)))
    new_arr = np.frombuffer(shared_arr, arr.dtype).reshape(arr.shape)  # shared_arr and new_arr the same memory
    new_arr[...] = arr
    return shared_arr, new_arr

def mp_init(shared_arr_, sharred_arr_shape_, sharred_arr_dtype_):
    global shared_arr, sharred_arr_shape, sharred_arr_dtype
    shared_arr = shared_arr_
    sharred_arr_shape = sharred_arr_shape_
    sharred_arr_dtype = sharred_arr_dtype_

def mp_iter(s):
    d = data[:, s[0], s[1], s[2]]
    if not np.isnan(d).all():
        
        isfinite = np.isfinite(d).nonzero()[0]
        d = decomposeCArray(flt_nan(d))
        robust_cov = MinCovDet(random_state=0).fit(d)
        outliers = robust_cov.mahalanobis(d) > chi2_q

        rmd_clip_f = np.frombuffer(shared_arr, dtype).reshape(shape)
        rmd_clip_f[isfinite, s[0], s[1], s[2]] = outliers

In [None]:
rmd_clip_f_fn = os.path.join(DATAPATH, 'loc_res_nrao', os.path.basename(os.path.basename(vis_file)).\
                             replace('.npz', '.rmd_clip_f_{}sig.npz'.format(sigma)))

if not os.path.exists(rmd_clip_f_fn):
    
    rmd_clip_f = np.ones_like(data, dtype=bool)
    d_shared, rmd_clip_f = create_mp_array(rmd_clip_f)
    dtype = rmd_clip_f.dtype
    shape = rmd_clip_f.shape

    m_pool = multiprocessing.Pool(multiprocessing.cpu_count(), initializer=mp_init, \
                                  initargs=(d_shared, dtype, shape))
    _ = m_pool.map(mp_iter, np.ndindex(data.shape[1:]))
    m_pool.close()
    m_pool.join()

    rmd_clip_f = rmd_clip_f ^ flags
    
    np.savez(rmd_clip_f_fn, flags=rmd_clip_f)

else:
    rmd_clip_f = np.load(rmd_clip_f_fn)['flags']

# apply min_N condition
rmd_clip_f[mad_f_min_n] = False

print('Number of data point flagged from RMD-clipping: {:,}'.format(rmd_clip_f.sum()))

In [None]:
fig, ax = plt.subplots(figsize=(7, 6), nrows=2, sharex=True)

mad_im = mad_clip_f.sum(axis=(0, 3)).T
rmd_im = rmd_clip_f.sum(axis=(0, 3)).T
tot_arr = np.concatenate((mad_im, rmd_im))
vmin = np.min(tot_arr)
vmax = np.max(tot_arr)

# # for vectorization of plot
# ax[0].pcolormesh(mad_im, vmin=vmin, vmax=vmax)
# ax[0].invert_yaxis()
# im = ax[1].pcolormesh(rmd_im, vmin=vmin, vmax=vmax)
# ax[1].invert_yaxis()

ax[0].imshow(mad_im, aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)
im = ax[1].imshow(rmd_im, aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)

ax[1].set_xlabel('Frequency channel')
ax[0].set_ylabel('Time integration')
ax[1].set_ylabel('Time integration')

fig.tight_layout()

# add color bar
fig.subplots_adjust(right=0.9)
cbar_ax = fig.add_axes([0.925, 0.125, 0.025, 0.815])
fig.colorbar(im, cax=cbar_ax)

# plt.savefig('/Users/matyasmolnar/Dropbox/PhD/Papers/memo_mvo/Figures/no_flags.pdf')
plt.show()

In [None]:
print('RMD-clipping flags {:.2f} times more data point than MAD-clipping'.\
      format(rmd_clip_f.sum() / mad_clip_f.sum()))

In [None]:
# # to find indices for plotting
# rmd_idxs = np.logical_xor(rmd_clip_f, mad_clip_f).nonzero()
# rnd_idx = np.random.randint(low=0, high=rmd_idxs[0].size)
# r = [rmd_idxs[i][rnd_idx] for i in range(len(rmd_idxs))]

### Visibility & power spectrum results for clipped data

In [None]:
mad_data = data.copy()
mad_data[mad_clip_f] *= np.nan

In [None]:
rmd_data = data.copy()
rmd_data[rmd_clip_f] *= np.nan

In [None]:
bl_grp = 0 # only look at 0th baseline group

slct_bl_idxs = np.where(redg[:, 0] == bl_grp)[0]
slct_red_bl = redg[slct_bl_idxs[0], :][1:]
print('Looking at baselines redundant to ({}, {}, \'{}\')'.\
      format(*slct_red_bl, pol))

In [None]:
mad_mean = np.nanmean(mad_data[..., slct_bl_idxs], axis=(0, 3))
rmd_mean = np.nanmean(rmd_data[..., slct_bl_idxs], axis=(0, 3))

In [None]:
fig, ax = plt.subplots(figsize=(7, 5), nrows=2, sharex=True)

mad_im = np.abs(mad_mean).T
rmd_im = np.abs(rmd_mean).T
tot_arr = np.concatenate((mad_im, rmd_im))
vmin = np.nanmin(tot_arr)
vmax = np.nanmax(tot_arr)

ax[0].imshow(mad_im, aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)
im = ax[1].imshow(rmd_im, aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)

ax[1].set_xlabel('Frequency channel')
ax[0].set_ylabel('Time integration')
ax[1].set_ylabel('Time integration')

fig.tight_layout()

# add color bar
fig.subplots_adjust(right=0.9)
cbar_ax = fig.add_axes([0.925, 0.15, 0.025, 0.75])
fig.colorbar(im, cax=cbar_ax)
cbar_ax.set_title(r'$\left| V \right|$')

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 5), nrows=2, sharex=True)

mad_im = np.angle(mad_mean).T
rmd_im = np.angle(rmd_mean).T
vmin = -np.pi
vmax = np.pi

ax[0].imshow(mad_im, aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)
im = ax[1].imshow(rmd_im, aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)

ax[1].set_xlabel('Frequency channel')
ax[0].set_ylabel('Time integration')
ax[1].set_ylabel('Time integration')

fig.tight_layout()

# add color bar
fig.subplots_adjust(right=0.9)
cbar_ax = fig.add_axes([0.925, 0.15, 0.025, 0.75])
fig.colorbar(im, cax=cbar_ax)
cbar_ax.set_title(r'$\varphi$')

plt.show()

In [None]:
band_1 = [175, 334]
band_2 = [515, 694]

band_i = band_1 # select band here
f_resolution = np.median(np.ediff1d(freqs))

In [None]:
jd_mad_mean = np.nanmean(mad_data[:, band_i[0]:band_i[1]+1, :, slct_bl_idxs], axis=0)
nan_bls = np.where(np.isnan(jd_mad_mean).all(axis=(0, 1)))[0]
jd_mad_mean = np.delete(jd_mad_mean, nan_bls, axis=2)

jd_rmd_mean = np.nanmean(rmd_data[:, band_i[0]:band_i[1]+1, :, slct_bl_idxs], axis=0)
nan_bls = np.where(np.isnan(jd_rmd_mean).all(axis=(0, 1)))[0]
rmd_mad_mean = np.delete(jd_rmd_mean, nan_bls, axis=2)

In [None]:
# cross-PS between all baseline pairs
no_bls = jd_mad_mean.shape[2]
bl_pairs = list(itertools.permutations(np.arange(no_bls), r=2))
bls1 = [i[0] for i in bl_pairs]
bls2 = [i[1] for i in bl_pairs]

mad_delay, mad_pspec = signal.csd(jd_mad_mean[..., bls1], jd_mad_mean[..., bls2], \
    fs=1/f_resolution, window='hann', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False, nperseg=jd_mad_mean.shape[0], axis=0)

delay_sort = np.argsort(mad_delay)
mad_delay = mad_delay[delay_sort]
mad_pspec = mad_pspec[delay_sort, :]

rmd_delay, rmd_pspec = signal.csd(jd_rmd_mean[..., bls1], jd_rmd_mean[..., bls2], \
    fs=1/f_resolution, window='hann', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False, nperseg=jd_rmd_mean.shape[0], axis=0)

delay_sort = np.argsort(rmd_delay)
rmd_delay = rmd_delay[delay_sort]
rmd_pspec = rmd_pspec[delay_sort, :]

mad_pspec = np.nanmean(mad_pspec, axis=2)
rmd_pspec = np.nanmean(rmd_pspec, axis=2)

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(7.5, 5), sharey=True)

axes[0].plot(mad_delay, np.abs(mad_pspec), alpha=0.3)
axes[0].plot(mad_delay, np.abs(mad_pspec.mean(axis=1)), alpha=1, color='orange')
axes[0].set_ylabel('Power spectrum')

axes[1].plot(rmd_delay, np.abs(rmd_pspec), alpha=0.3)
axes[1].plot(rmd_delay, np.abs(rmd_pspec.mean(axis=1)), alpha=1, color='purple')

# average over times
axes[2].plot(mad_delay, np.abs(mad_pspec.mean(axis=1)), alpha=0.6, color='orange', \
             label='MAD-clipped')
axes[2].plot(rmd_delay, np.abs(rmd_pspec.mean(axis=1)), alpha=0.6, color='purple', \
             label='RMD-clipped')

for ax in axes:
    ax.set_yscale('log')
    ax.set_xlabel('Delay')
    
axes[0].set_title('MAD-clipped')
axes[1].set_title('RMD-clipped')
axes[2].set_title('Comparison')
axes[2].legend(loc='best', prop={'size': 8})

plt.tight_layout()
plt.show()

In [None]:
dly_lim = 1.25e-6
high_dlys = np.where(np.abs(mad_delay) >= dly_lim)
resid = (np.abs(mad_pspec.mean(axis=1)) - np.abs(rmd_pspec.mean(axis=1)))[high_dlys]
log_resid = log_resid = (np.log10(np.abs(mad_pspec.mean(axis=1))) - \
                         np.log10(np.abs(rmd_pspec.mean(axis=1))))[high_dlys]
print('Mean residual: {:4e}'.format(resid.mean()))
print('Mean log residual: {:4e}'.format(log_resid.mean()))

fig, ax = plt.subplots(figsize=(7.5, 5))
ax.scatter(mad_delay[high_dlys], resid, s=4, alpha=0.8, label='MAD - RMD residual')
ax.axhline(np.mean(resid), ls='--', color='orange', label='Mean residual')
ax.axvspan(-dly_lim, dly_lim, alpha=0.4, color='grey')
ax.ticklabel_format(axis='both', style='sci', scilimits=(0, 0))
ax.set_xlabel('Delay')
ax.set_ylabel('PS residual')
ax.legend(loc='upper right', prop={'size': 8})

plt.tight_layout()
plt.show()