<center><strong><font size=+3>Wavelet Power Spectrum Analysis in Napari</font></center>
<br><br>
</center>
<center><strong><font size=+2>Matyas Molnar and Bojan Nikolic</font><br></strong></center>
<br><center><strong><font size=+1>Astrophysics Group, Cavendish Laboratory, University of Cambridge</font></strong></center>

### View CWT products in napari

In [None]:
import os

import numpy as np
import pandas as pd
from astropy.stats import mad_std, sigma_clip
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LogNorm
from mpl_toolkits.axes_grid1 import make_axes_locatable

from scipy import signal

import pywt
import scaleogram as scg

from hera_cal.io import HERAData

In [None]:
from matplotlib import rc
rc('font',**{'family':'serif','serif':['cm']})
rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{amssymb} \usepackage{amsmath}')

In [None]:
%matplotlib inline

In [None]:
npz_f1 = np.load('cwt_power_b1.npz')
npz_f2 = np.load('cwt_power_b2.npz')
data1 = npz_f1['power']
data2 = npz_f2['power']

In [None]:
lsts = npz_f1['lsts']
redg = npz_f1['redg']
chans1 = npz_f1['chans']
chans2 = npz_f2['chans']
freqs1 = npz_f1['freqs']
freqs2 = npz_f2['freqs']
wavelet = npz_f1['wavelet'].item()
scales = npz_f1['scales']

In [None]:
lstb_dir = '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/LSTBIN/one_group/grp1'
if not os.path.exists(lstb_dir):
    lstb_dir = '/Users/matyasmolnar/Downloads/HERA_Data/sample_data/'

zen_lstb = os.path.join(lstb_dir, 'zen.grp1.of1.LST.1.31552.HH.OCRSL.uvh5')

hd = HERAData(zen_lstb)

In [None]:
# freqs = np.linspace(1e8, 2e8, 1024+1)[:-1]
# freq_resolution = np.median(np.ediff1d(freqs))

freqs = hd.freqs
freq_resolution = hd.channel_width

In [None]:
# # ant filt to make more manageable
# data1 = data1[..., 100:200]
# data2 = data2[..., 100:200]

In [None]:
band_1 = [175, 334]
band_2 = [515, 694]

field_1 = [1.25, 2.70]
field_2 = [4.50, 6.50]
field_3 = [8.50, 10.75]

In [None]:
f1 = np.where((lsts > field_1[0]) & (lsts < field_1[1]))[0]
f2 = np.where((lsts > field_2[0]) & (lsts < field_2[1]))[0]
f3 = np.where((lsts > field_3[0]) & (lsts < field_3[1]))[0]

In [None]:
print(f1, f2, f3, sep='\n\n')

In [None]:
antsep = {}
for row in redg:
    antsep[tuple(row[1:])] = np.abs(hd.antpos[row[2]] - hd.antpos[row[1]])

proj_ew = 14
# baselines with projected EW length < 14 m
nan_bls = [i for i, (k, v) in enumerate(antsep.items()) if v[0] < proj_ew]
ok_bls = [i for i, (k, v) in enumerate(antsep.items()) if v[0] > proj_ew]

In [None]:
data1[..., nan_bls] *= np.nan
data2[..., nan_bls] *= np.nan

In [None]:
if False:
    
    import napari

    # get two bands side by side
    # careful because B1 and B2 will have different scales..
    data_m = np.concatenate((data1, data2), axis=1)
    # data_m = data2

    # only look at times from Field 2
    # in napari - set auto-constrast to "once", and adjust contrast limits & gamma
    # found that FPS 8 works well, with play mode "back and forth" (for time axis)

    viewer = napari.view_image(np.log(data_m[:, :, :, :]), colormap='turbo', ndisplay=2, order=(2, 3, 0, 1), \
                               gamma=1, interpolation='nearest', scale=(8, 1, 1, 1))

### Automatic detection

In [None]:
# TODO
# same vlims for saved figs
# convolve abs_mod_z images as don't just want a single bad point, want a bad region

In [None]:
SELECT_BAND = 'Band 2' # choose band

In [None]:
if SELECT_BAND == 'Band 1':
    sb = 0
    b_freqs = freqs1
    cwt_data = data1
    mad_clip_thresh = 2e-1
    
    
if SELECT_BAND == 'Band 2':
    sb = 1
    b_freqs = freqs2
    cwt_data = data2
    mad_clip_thresh = 7e-2

In [None]:
axis = (2, 3)  # times and baselines
# compute median and MAD across times and baselines
med = np.nanmedian(cwt_data, axis=axis)  # shape (scales, freqs)
mad = mad_std(cwt_data, axis=axis, ignore_nan=True)  # shape (scales, freqs)

# find dimensions to tile over
ex_dims = np.ones(cwt_data.ndim, dtype=int)
for ax in axis:
    ex_dims[ax] = cwt_data.shape[ax]
    
# tile over dimensions that were averaged over
tile_loc = np.tile(np.expand_dims(med, axis=axis), ex_dims)
tile_scale = np.tile(np.expand_dims(mad, axis=axis), ex_dims)

# calculate modified Z score
modz = (cwt_data - tile_loc) / tile_scale

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125, sharey=True)

extent = [b_freqs[0]/1e6, b_freqs[-1]/1e6, med.shape[0]+0.5, 0.5]

norm = LogNorm()
im1 = axes[0].imshow(med, aspect='auto', interpolation='none', cmap='jet', norm=norm, \
                     extent=extent)
im2 = axes[1].imshow(mad, aspect='auto', interpolation='none', cmap='jet', norm=norm, \
                     extent=extent)

divider = make_axes_locatable(axes[0])
cax1 = divider.append_axes('right', size='5%', pad=0.05)   
plt.colorbar(im1, cax=cax1)

divider = make_axes_locatable(axes[1])
cax2 = divider.append_axes('right', size='5%', pad=0.05)   
plt.colorbar(im2, cax=cax2)

axes[0].set_ylabel('Wavelet scale')
axes[0].set_xlabel('Frequency [MHz]')
axes[1].set_xlabel('Frequency [MHz]')

axes[0].set_title('Median')
axes[1].set_title('MAD')

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(8, 4), dpi=125)

im1 = axes[0].pcolormesh(b_freqs/1e6, scales+0.5, med, norm=LogNorm(), cmap='jet')
axes[0].invert_yaxis()
axes[0].set_yscale('log')

xtk = np.linspace(round(b_freqs[0], -6), round(b_freqs[-1], -6), 10)
intticks = (xtk[1:-1]//1e6).astype(int)

axes[0].set_xticks(intticks)
axes[0].set_xticklabels(intticks)

divider = make_axes_locatable(axes[0])
cax1 = divider.append_axes('right', size='5%', pad=0.1)
plt.colorbar(im1, cax=cax1)

im2 = axes[1].pcolormesh(b_freqs/1e6, scales+0.5, mad, norm=LogNorm(), cmap='jet')
axes[1].invert_yaxis()
axes[1].set_yscale('log')
axes[1].set_xticks(intticks)
axes[1].set_xticklabels(intticks)

divider = make_axes_locatable(axes[1])
cax2 = divider.append_axes('right', size='5%', pad=0.1)
plt.colorbar(im2, cax=cax2, label=r'')

axes[0].set_title('Median')
axes[0].set_xlabel('Frequency [MHz]')
axes[1].set_title('MAD')
axes[1].set_xlabel('Frequency [MHz]')
axes[0].set_ylabel('Scale')
axes[1].set_ylabel('Scale')
axes[1].yaxis.label.set_color('white')

# axes[0].plot(scales*hd.channel_width*np.sqrt(2)/1e6+b_freqs[0]/1e6, scales, c='black')

fig.tight_layout()

# save_fig_dir = '/lustre/aoc/projects/hera/mmolnar/figs'
# plt.savefig(os.path.join(save_fig_dir, 'med_mad_b2_2.pdf'), bbox_inches='tight')

plt.show()

In [None]:
hp = mad > mad_clip_thresh
cmap_bool = ListedColormap(['green','red'])

fig, ax = plt.subplots(figsize=(5, 4), dpi=125, sharey=True)

im = ax.imshow(hp, aspect='auto', interpolation='None', cmap=cmap_bool, vmin=0, vmax=1, \
               extent=extent)

divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.1)   
cbar = plt.colorbar(im, cax=cax)

cbar.set_ticks([0.25, 0.75])
cbar.set_ticklabels(['False', 'True'])

ax.set_ylabel('Wavelet scale')
ax.set_xlabel('Frequency [MHz]')

plt.tight_layout()
plt.show()

In [None]:
flged_bls = np.isnan(cwt_data).all(axis=(0, 1, 2))
flged_tints = np.isnan(cwt_data).all(axis=(0, 1, 3))

In [None]:
# Apply flags
modz[hp, ...] = np.nan  # flag regions of the scaleogram in the CoI (cone of influence; edge-effect artifacts)
modz[..., flged_bls] = np.nan  # flagged baselines (from calibration & from proj EW < 14 m)
modz[..., flged_tints, :] = np.nan  # remove fully flagged times

# Only look at relevant data
# select bands and fields ONLY
deslct_tints = np.delete(np.arange(modz.shape[2]), np.concatenate((f1, f2, f3)))
modz[..., deslct_tints, :] = np.nan

# Look at absolute value of modified Z-score
abs_modz = np.abs(modz)

In [None]:
sample_abs_modz = abs_modz[..., f2[10], 150]

if sample_abs_modz.nonzero()[0].size > 0:
    fig, ax = plt.subplots(figsize=(4, 4), dpi=125)
    
    norm = None  # LogNorm()
    im = ax.imshow(sample_abs_modz, aspect='auto', interpolation='None', norm=norm, extent=extent)

    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    
    ax.set_title(r'$|\mathrm{mod} Z|$')
    ax.set_ylabel('Wavelet scale')
    ax.set_xlabel('Frequency [MHz]')

    plt.colorbar(im, cax=cax)
    plt.tight_layout()
    plt.show()
    
else:
    print('Flagged baseline or time.')

In [None]:
fig, ax = plt.subplots(figsize=(6, 4), dpi=125)

ax.hist(abs_modz.ravel(), bins=100, density=False, log=True)

ax.set_xlabel(r'$|Z^{\mathrm{mod}}|$')
ax.set_ylabel('Number of Coefficients')

plt.tight_layout()
plt.show()

In [None]:
# look at worst offender
worst_idx_flat = np.nanargmax(abs_modz)
worst_idx = np.unravel_index(worst_idx_flat, abs_modz.shape)
print(f'Worst slice is for LST {lsts[worst_idx[2]]:.2f} h and baseline {redg[worst_idx[3]][1:]}')
print(f'Modified Z-score of worst slice is {modz[worst_idx]:.2f}')

In [None]:
sort_abs_modz = abs_modz.copy()
sort_abs_modz[np.isnan(sort_abs_modz)] = 0
sorted_modz_idx = np.argsort(sort_abs_modz.ravel())[::-1]
del sort_abs_modz

In [None]:
offender = 0

bad_idx = np.unravel_index(sorted_modz_idx[offender], abs_modz.shape)
sample_abs_modz = abs_modz[..., bad_idx[-2], bad_idx[-1]]

abs_modz_vmin = 1e-1

print(f'Bad slice is for LST {lsts[bad_idx[2]]:.2f} h and baseline {redg[bad_idx[3]][1:]}')
print(f'Modified Z-score of bad slice is {modz[bad_idx]:.2f}')

if sample_abs_modz.nonzero()[0].size > 0:
    
    fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125)
    
    im1 = axes[0].imshow(sample_abs_modz, aspect='auto', interpolation='None', \
                         norm=LogNorm(vmin=abs_modz_vmin), cmap='jet', extent=extent)
    axes[0].scatter(b_freqs[bad_idx[1]]/1e6, bad_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                    lw=2, ls='--')
    divider = make_axes_locatable(axes[0])
    cax1 = divider.append_axes('right', size='5%', pad=0.05)
    plt.colorbar(im1, cax=cax1)
    axes[0].set_title(r'Modified $Z$-score')
    
    im2 = axes[1].imshow(cwt_data[..., bad_idx[-2], bad_idx[-1]], aspect='auto', interpolation='None', \
                         norm=LogNorm(), cmap='jet', extent=extent)
    axes[1].scatter(b_freqs[bad_idx[1]]/1e6, bad_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                    lw=2, ls='--')
    divider = make_axes_locatable(axes[1])
    cax2 = divider.append_axes('right', size='5%', pad=0.05)
    plt.colorbar(im2, cax=cax2)
    axes[1].set_title('Scaleogram')
    
    axes[0].set_ylabel('Wavelet scale')
    axes[0].set_xlabel('Frequency [MHz]')
    axes[1].set_xlabel('Frequency [MHz]')
    
    plt.tight_layout()
    plt.show()
    
else:
    print('Flagged basline or time.')

In [None]:
abs_modz_thresh = 20

bad_modz_rav_idxs = sorted_modz_idx[:(abs_modz > abs_modz_thresh).sum()]
bad_modz_idxs = [np.unravel_index(i, abs_modz.shape) for i in bad_modz_rav_idxs]
bad_modz_idxs_arr = np.array(bad_modz_idxs)
bad_modz_t_bl_idxs = [i[2:] for i in bad_modz_idxs]
bad_modz_scores = abs_modz.ravel()[bad_modz_rav_idxs]

lookup = set()  # a temporary lookup set
bad_modz_t_bl = [x for x in bad_modz_t_bl_idxs if x not in lookup and lookup.add(x) is None]

In [None]:
len(bad_modz_t_bl)

In [None]:
bad_slice = 0

sample_abs_modz = abs_modz[..., bad_modz_t_bl[bad_slice][0], bad_modz_t_bl[bad_slice][1]]

blst = lsts[bad_modz_t_bl[bad_slice][0]]
bbl = redg[bad_modz_t_bl[bad_slice][1]][1:]
print(f'Bad slice is for LST {blst:.2f} h and baseline '\
      f'{bbl}')

if sample_abs_modz.nonzero()[0].size > 0:
    
    fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125)
    
    for i in bad_modz_idxs:
        if i[2:] == bad_modz_t_bl[bad_slice]:
            sf_idx = i
            break
    
    im1 = axes[0].imshow(sample_abs_modz, aspect='auto', interpolation='None', \
                         norm=LogNorm(vmin=abs_modz_vmin), cmap='jet', extent=extent)
    # circle worst pixel in scaleogram
    axes[0].scatter(b_freqs[sf_idx[1]]/1e6, sf_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                    lw=2, ls='--')
    divider = make_axes_locatable(axes[0])
    cax1 = divider.append_axes('right', size='5%', pad=0.05)
    plt.colorbar(im1, cax=cax1)
    axes[0].set_title(r'Modified $Z$-score')
    
    
    im2 = axes[1].imshow(cwt_data[..., bad_modz_t_bl[bad_slice][0], bad_modz_t_bl[bad_slice][1]], \
                         aspect='auto', interpolation='None', norm=LogNorm(), cmap='jet', extent=extent)
    axes[1].scatter(b_freqs[sf_idx[1]]/1e6, sf_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                    lw=2, ls='--')
    divider = make_axes_locatable(axes[1])
    cax2 = divider.append_axes('right', size='5%', pad=0.05)
    plt.colorbar(im2, cax=cax2)
    axes[1].set_title('Scaleogram')
    
    axes[0].annotate(SELECT_BAND + f'\n LST: {blst:.2f} h \n bl:{bbl} \n mod-Z:{modz[sf_idx]:.1f}', \
                     xycoords='axes fraction', xy=(0.5, 0.03), ha='center', va='bottom', fontsize=8, \
                     bbox=dict(facecolor='white', edgecolor='black', boxstyle='round, pad=0.3', alpha=0.5))
    
    axes[0].set_ylabel('Wavelet scale')
    axes[0].set_xlabel('Frequency [MHz]')
    axes[1].set_xlabel('Frequency [MHz]')
    
    plt.tight_layout()
    plt.show()
    
else:
    print('Flagged basline or time.')

In [None]:
# Plotting & saving bad slices

if False:
    
    import multiprocess as multiprocessing
    
    save_dir = os.path.join('/lustre/aoc/projects/hera/mmolnar/wavelets/figures/scg_modz', \
                            SELECT_BAND.replace(' ', '_').lower())
    
    def bl_iter(bad_slice):

#         if bad_slice % 20 == 0:
#             print(bad_slice)

        sample_abs_modz = abs_modz[..., bad_modz_t_bl[bad_slice][0], bad_modz_t_bl[bad_slice][1]]

        blst = lsts[bad_modz_t_bl[bad_slice][0]]
        bbl = redg[bad_modz_t_bl[bad_slice][1]][1:]

        fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=300)

        for i in bad_modz_idxs:
            if i[2:] == bad_modz_t_bl[bad_slice]:
                sf_idx = i
                break

        im1 = axes[0].imshow(sample_abs_modz, aspect='auto', interpolation='None', norm=LogNorm(), \
                             cmap='jet', extent=extent)
        # circle worst pixel in scaleogram
        axes[0].scatter(b_freqs[sf_idx[1]]/1e6, sf_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                        lw=2, ls='--')
        divider = make_axes_locatable(axes[0])
        cax1 = divider.append_axes('right', size='5%', pad=0.05)
        plt.colorbar(im1, cax=cax1)
        axes[0].set_title(r'Modified $Z$-score')


        im2 = axes[1].imshow(cwt_data[..., bad_modz_t_bl[bad_slice][0], bad_modz_t_bl[bad_slice][1]], \
                             aspect='auto', interpolation='None', norm=LogNorm(), cmap='jet', extent=extent)
        axes[1].scatter(b_freqs[sf_idx[1]]/1e6, sf_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                        lw=2, ls='--')
        divider = make_axes_locatable(axes[1])
        cax2 = divider.append_axes('right', size='5%', pad=0.05)
        plt.colorbar(im2, cax=cax2)
        axes[1].set_title('Scaleogram')

        axes[0].annotate(SELECT_BAND + f'\n LST: {blst:.2f} h \n bl:{bbl} \n mod-Z:{modz[sf_idx]:.1f}', \
                         xycoords='axes fraction', xy=(0.5, 0.03), ha='center', va='bottom', fontsize=8, \
                         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round, pad=0.3', alpha=0.5))

        axes[0].set_ylabel('Wavelet scale')
        axes[0].set_xlabel('Frequency [MHz]')
        axes[1].set_xlabel('Frequency [MHz]')

        fig.tight_layout()

        save_fn = 'scg_modz_' + str(bad_modz_t_bl[bad_slice]).replace(', ', '_') + '.png'
        plt.savefig(os.path.join(save_dir, save_fn), bbox_inches='tight')

        plt.close()
        
    m_pool = multiprocessing.Pool(multiprocessing.cpu_count())
    _ = m_pool.map(bl_iter, range(len(bad_modz_t_bl)))
    m_pool.close()
    m_pool.join()

#### Affected antennas & baselines

In [None]:
bbl_idxs = bad_modz_idxs_arr[:, 3]
rbls = redg[bbl_idxs]

d = {'time': bad_modz_idxs_arr[:, 2], 'bl_idx': bbl_idxs, \
     'scale': bad_modz_idxs_arr[:, 0], 'freq': bad_modz_idxs_arr[:, 1], \
     'abs_Z': bad_modz_scores, 'ant1': rbls[:, 1], 'ant2': rbls[:, 2]}
df = pd.DataFrame(data=d)

In [None]:
a1 = df[['abs_Z', 'ant1']].rename(columns={'ant1':'ant'})
a2 = df[['abs_Z', 'ant2']].rename(columns={'ant2':'ant'})

dfc = pd.concat([a1, a2], ignore_index=True)

In [None]:
df_ant = dfc.groupby('ant').mean()
df_ant -= df_ant.min()

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
df_ant.plot(kind='bar', ax=ax)
fig.tight_layout()
plt.show()

In [None]:
df_bl = df.groupby(by='bl_idx')['abs_Z'].mean()  # just look at excess

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
ax = df_bl.plot()
fig.tight_layout()
plt.show()

In [None]:
# Get bad antennae

hera_pkgs = '/Users/matyasmolnar/Downloads/HERA_Data/hera_packages'
if not os.path.exists(hera_pkgs):
    hera_pkgs = '/lustre/aoc/projects/hera/mmolnar/hera_packages'

bad_ants_folder = os.path.join(hera_pkgs, 'hera_pipelines/pipelines/h1c/idr2/v2/bad_ants/')

idr2_jds = [2458098, 2458099, 2458101, 2458102, 2458103, 2458104, 2458105, \
            2458106, 2458107, 2458108, 2458109, 2458110, 2458111, 2458112, \
            2458113, 2458114, 2458115, 2458116]

h1c_idr2_bad_ants = [np.loadtxt(os.path.join(bad_ants_folder, f'{jd}.txt')).astype(int) \
                                              for jd in idr2_jds]
h1c_idr2_bad_ants = np.array(sorted(list(set.intersection(*map(set, h1c_idr2_bad_ants)))))

antpos = hd.antpos
antpos = {k:v for k, v in antpos.items() if k not in h1c_idr2_bad_ants}

x_pos = np.array(list(antpos.values()))[:, 0] - antpos[1][0]  # subtract x pos of antenna 1
y_pos = np.array(list(antpos.values()))[:, 1] - antpos[65][1]
ants = antpos.keys()

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=100)


# Plot antennae
ax.tick_params(labelsize=14, size=5)
ax.scatter(x_pos, y_pos, s=600, edgecolors='black', c='w', alpha=0.5)
for i, (ant, pos) in enumerate(antpos.items()):
    ax.text(x_pos[i], y_pos[i], str(ant), va='center', ha='center', color='black', \
            fontsize=12, weight='bold')

# Plot baselines
ap_dict = {k: i for i, k in enumerate(antpos.keys())}
lws = df_bl.values - df_bl.values.min()
lws /= 4
for i, bl in enumerate(redg[df_bl.index.values, 1:]):
    alpha = 0.4 + 0.4*lws[i]/lws.max()
    ax.plot((x_pos[ap_dict[bl[0]]], x_pos[ap_dict[bl[1]]]), \
            (y_pos[ap_dict[bl[0]]], y_pos[ap_dict[bl[1]]]), lw=lws[i], alpha=alpha)

ax.set_xlabel("East-West [m]", fontsize=14)
ax.set_ylabel("North-South [m]", fontsize=14)
ax.set_title('Baselines with Increased WPS', fontsize=14)
ax.set_xlim(-65, 57)
ax.set_ylim(-77, 65)
ax.set_aspect('equal')
plt.tight_layout()

# save_fig_dir = '/Users/matyasmolnar/Desktop/Thesis/CHAP-5/FIGS/'
# plt.savefig(os.path.join(save_fig_dir, 'modz_bls.pdf'), bbox_inches='tight')

plt.show()

#### Affected times

In [None]:
df_time = df.groupby(by='time')['abs_Z'].max() - abs_modz_thresh  # just look at excess

In [None]:
fig, ax = plt.subplots(figsize=(9, 4), dpi=100)
ax = df_time.plot(kind='bar')
fig.tight_layout()
plt.show()

In [None]:
max_abs_modz = np.nanmax(abs_modz, axis=(0, 1))

In [None]:
fig, ax = plt.subplots(figsize=(7, 7), dpi=125)

im = ax.pcolormesh(np.arange(redg.shape[0]), lsts, max_abs_modz, cmap='viridis', \
                   norm=LogNorm(vmin=1e1))

divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='2.5%', pad=0.6)   
plt.colorbar(im, cax=cax, orientation='horizontal', label=r'$|Z^{\mathrm{mod}}|$', extend='min')

ax.set_xlabel('Baseline Index')
ax.set_ylabel('LST [h]')
ax.set_ylim(np.array([np.ceil(lsts[-1]), np.floor(lsts[0])]))

fig.tight_layout()

# save_fig_dir = '/lustre/aoc/projects/hera/mmolnar/figs'
# plt.savefig(os.path.join(save_fig_dir, 'bad_bl_modz.pdf'), bbox_inches='tight')

plt.show()

In [None]:
from mpl_toolkits.axes_grid1 import AxesGrid

In [None]:
from mpl_toolkits.axes_grid1 import AxesGrid

fig = plt.figure(figsize=(7, 7), dpi=125, constrained_layout=True)

grid = AxesGrid(fig, rect=(311, 312, 313), nrows_ncols=(3, 1), axes_pad=0.05, cbar_mode='single', \
                cbar_location='bottom', cbar_pad=0.1)
    
for i, field in enumerate([f1, f2, f3]):
    im = grid[i].pcolormesh(np.arange(redg.shape[0]), lsts[field], max_abs_modz[field, :], cmap='viridis', \
                            norm=LogNorm(vmin=1e1, vmax=round(np.nanmax(max_abs_modz), -1)))
    
#     axes[i].set_ylabel('LST [h]')
#     axes[i].invert_yaxis()

# when cbar_mode is 'single', for ax in grid, ax.cax = grid.cbar_axes[0]

# cbar = ax.cax.colorbar(im)
# cbar = grid.cbar_axes[0].colorbar(im)

# cbar.ax.set_yticks(np.arange(0, 1.1, 0.5))
# cbar.ax.set_yticklabels(['low', 'medium', 'high'])
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=3, figsize=(7, 7), dpi=125, sharex=True, constrained_layout=True)

for i, field in enumerate([f1, f2, f3]):

    im = axes[i].pcolormesh(np.arange(redg.shape[0]), lsts[field], max_abs_modz[field, :], cmap='viridis', \
                            norm=LogNorm(vmin=1e1, vmax=round(np.nanmax(max_abs_modz), -1)))
    
    axes[i].set_ylabel('LST [h]')
    axes[i].invert_yaxis()

fig.colorbar(im, ax=axes.flat, orientation='horizontal', label=r'$|Z^{\mathrm{mod}}|$', extend='min')

axes[-1].set_xlabel('Baseline Index')

# ax.set_ylim(np.array([np.ceil(lsts[-1]), np.floor(lsts[0])]))

# fig.tight_layout()

# save_fig_dir = '/lustre/aoc/projects/hera/mmolnar/figs'
# plt.savefig(os.path.join(save_fig_dir, 'bad_bl_modz.pdf'), bbox_inches='tight')

plt.show()

In [None]:
max_abs_modz_ant = np.empty((lsts.size, hd.ants.size))
max_abs_modz_ant *= np.nan
for i, ant in enumerate(hd.ants):
    ant_idxs = np.where(redg[:, 1:] == ant)[0]
    if ant_idxs.size > 0:
        ant_max = np.nanmax(max_abs_modz[:, ant_idxs], axis=1)
        max_abs_modz_ant[:, i] = ant_max

In [None]:
fig, ax = plt.subplots(figsize=(7, 7), dpi=125)

im = ax.pcolormesh(np.arange(hd.ants.size), lsts, max_abs_modz_ant, cmap='viridis', \
                   norm=LogNorm(vmin=1e1))

divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='2.5%', pad=0.6)   
plt.colorbar(im, cax=cax, orientation='horizontal', label=r'$|Z^{\mathrm{mod}}|$', extend='min')

ax.set_xlabel('Antenna')
ax.set_ylabel('LST [h]')
ax.set_ylim(np.array([np.ceil(lsts[-1]), np.floor(lsts[0])]))

ax.set_xticks(np.arange(hd.ants.size))
ax.set_xticklabels(hd.ants, rotation='vertical')

fig.tight_layout()

# save_fig_dir = '/lustre/aoc/projects/hera/mmolnar/figs'
# plt.savefig(os.path.join(save_fig_dir, 'bad_ant_modz.pdf'), bbox_inches='tight')

plt.show()

### Notes v1

**These notes were taken when analysing all baselines that do not contained flagged antennas. It was then noted that baselines with projected EW < 14 metres are discarded in the power spectrum computation, so the analysis was repeated, with comments written in Notes v2.**

In the below by "delay" we mean delay of the wavelet, with wavelets of small scale -> compressed wavelet -> rapidly changing details -> higher delay (since the signal is in frequency space to start off with)

Redundant analysis:
 - B1 baseline group 1, 3 power at high-ish delays
 - B2F2 baseline group 7 power at low delays
 - B1 & B2 baseline group 12 and 13 has more power at mid delays and high
 - B1 & B2 baseline group 20, 30, 31, 45 higher power at mid delays
 - B1 baseline group 67, 68, 72, 74, 82, 83, 88, 101 localized power at mid delays
 - B1 & B2 baseline group 77, 78, 81, 99, 105, 106 localized power at mid delays
 
All baselines analysis:

 - B1 baseline 12, 16, 69, 74 (bad), 212, 230, 324, 540, 655, 657, 660, 731 power at mid delays
 - B2 baseline 23, 27, 71, 196, 198 power at mid delays
 - B1 & B2 baseline 28, 29, 30, 33 (v bad), 37 (bad), 75, 77, 200, 201 (bad), 202, 203, 206, 208 (bad), 209, 210, 218 (bad), 220, 221, 227, 228, 315 (bad), 319, 321 (bad), 322, 323, 423, 426, 430, 439, 543, 544, 546, 659  power at mid delays
 - B1 baseline 1, 6, 8, 9, 20, 21, 25, 31, 34, 73, 79, 216, 223, 224, 225, 424, 440, 662, 696 localized power at mid delays
 - B2 baseline 3, 19, 35, 195, 212, 313, 432 localized power at mid delays
 - B1 & B2 baseline 14, 17, 204, 217, 219, 222, 316, 317, 433, 434, 436 localized power at mid delays
 
 
Other notes:
 - For B1 get localized power at mid delays at higher end of frequency band - recurring spot for a few baselines
 - Bls 33, 37, 74 bad, with lots of power at mid delays, especially in Band 1
 - Features do not appear transient in time - high power seems to be present across times for specific baselines
 - B1 seems worse than B2 for Field 2? looking at H1C limits, expect B1 to be worse, so effects will be more noticeable, especially when comparing on the same scale

### Notes v2

**Repeating the notes and looking at Band 2 alone (so that scales are not distorted) and not looking at baselines that have projected EW < 14 m**

### Look at some example slices where CWT looks bad

In [None]:
hr_full_fn = 'h1c_idr2.OCRSLP2XTK.npz'
vis_data = np.load(hr_full_fn)['arr_0']

In [None]:
bad_slice = 0

sample_tint = bad_modz_t_bl[bad_slice][0]
sample_bl = bad_modz_t_bl[bad_slice][1]
print(f'Examining baseline {redg[sample_bl, :][1:]} at LST {lsts[sample_tint]:.3f}')

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125, sharey=True)
axes[0].imshow(np.log10(data1[..., sample_tint, sample_bl]), aspect='auto', interpolation='none', cmap='jet')
axes[1].imshow(np.log10(data2[..., sample_tint, sample_bl]), aspect='auto', interpolation='none', cmap='jet')
axes[0].set_title('Band 1')
axes[1].set_title('Band 2')
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125, sharey=True)

# # set same values scale
# sdata = np.concatenate((data1[..., sample_tint, sample_bl], data2[..., sample_tint, sample_bl]), axis=1)
# vmin = sdata.min()
# vmax = sdata.max()
# vlims = (vmin, vmax)
vlims = None
coikw = {'alpha':0.1, 'hatch':'/'}

sample_data1 = vis_data[sample_tint, band_1[0]:band_1[1]+1, sample_bl]
sample_data2 = vis_data[sample_tint, band_2[0]:band_2[1]+1, sample_bl]

r = scg.cws(freqs[chans1], sample_data1, scales=scales, wavelet=wavelet, cscale='log', coi=True, \
            ax=axes[0], spectrum='power', yaxis='scale', title='WPS B1', \
            xlabel='Frequency', ylabel='Delay', yscale='log', cwt_fun='pywt', vlims=vlims, coikw=coikw)

_ = scg.cws(freqs[chans2], sample_data2, scales=scales, wavelet=wavelet, cscale='log', coi=True, \
            ax=axes[1], spectrum='power', yaxis='scale', title='WPS B2', \
            xlabel='Frequency', ylabel='Delay', yscale='log', cwt_fun='pywt', vlims=vlims, coikw=coikw)

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(8, 4), dpi=125)

# WPS Scaleogram
ax, qmesh, values = scg.cws(b_freqs, sample_data2, scales=scales, wavelet=wavelet, cscale='log', \
    coi=True, ax=axes[1], spectrum='power', yaxis='frequency', \
    title='WPS', xlabel='Frequency [MHz]', ylabel=r'Delay [$\mu$s]', \
    yscale='log', cwt_fun='pywt', vlims=vlims, cbar=False, coikw=coikw)

axes[1].scatter(b_freqs[sf_idx[1]], pywt.scale2frequency(wavelet, sf_idx[0]+1)/hd.channel_width, \
                s=1000, fc='None', edgecolors='cyan', lw=2, ls='--')

divider = make_axes_locatable(axes[1])
cax1 = divider.append_axes('right', size='5%', pad=0.1)
plt.colorbar(qmesh, cax=cax1)

axes[1].set_xticks(xtk[1:-1])
axes[1].set_xticklabels(intticks)

axes[1].set_yticks([1e-6, 1e-5])
axes[1].set_yticklabels([r'$10^0$', r'$10^1$'])
# axes[1].set_yticklabels([r'$10^{-6}$', r'$10^{-5}$'])

# modZ plot
# im1 = axes[0].imshow(sample_abs_modz, aspect='auto', interpolation='None', \
#                      norm=LogNorm(vmin=abs_modz_vmin), cmap='jet', \
#                      extent=[extent[0], extent[1], scales[-1]+1, scales[0]])
bad_idx[-2], bad_idx[-1]
im1 = axes[0].pcolormesh(b_freqs/1e6, scales+0.5, sample_abs_modz, \
                         norm=LogNorm(vmin=abs_modz_vmin), cmap='jet')
# circle worst pixel in scaleogram
axes[0].scatter(b_freqs[sf_idx[1]]/1e6, sf_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                lw=2, ls='--')
axes[0].set_ylim((1, 18))
axes[0].invert_yaxis()
axes[0].set_yscale('log')
axes[0].set_xticks(intticks)
axes[0].set_xticklabels(intticks)

divider = make_axes_locatable(axes[0])
cax1 = divider.append_axes('right', size='5%', pad=0.1)
plt.colorbar(im1, cax=cax1)

axes[0].set_title(r'Modified $Z$-score')
axes[0].set_xlabel('Frequency [MHz]')
axes[0].set_ylabel('Scale')

fig.tight_layout()

# save_fig_dir = '/lustre/aoc/projects/hera/mmolnar/figs'
# plt.savefig(os.path.join(save_fig_dir, 'modz_cwtps.pdf'), bbox_inches='tight')

plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125)

c = [chans1, chans2]
s = [sample_data1, sample_data2]
t = ['Band 1 Field 2', 'Band 2 Field 2']

for i, ax in enumerate(axes):
    ax.plot(freqs[c[i]]/1e6, s[i].real, label=r'$\mathfrak{Re}(V)$')
    ax.plot(freqs[c[i]]/1e6, s[i].imag, label=r'$\mathfrak{Im}(V)$')
    ax.set_xlabel('Frequency [MHz]')
    ax.set_title(t[i])
    
axes[0].set_ylabel('Visibility')
axes[0].legend(loc='best')

plt.tight_layout()
plt.show()

In [None]:
# Look at mean of baselines in redundant group to see if visibilities are similar
bl_grp = redg[sample_bl][0]
red_grp = np.where(redg[:, 0] == bl_grp)[0]

In [None]:
fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(7.5, 7.5), dpi=125, sharex='col')

# sample_data3 = np.nanmean(vis_data[sample_tint, band_1[0]:band_1[1]+1, red_grp], axis=0)
# sample_data4 = np.nanmean(vis_data[sample_tint, band_2[0]:band_2[1]+1, red_grp], axis=0)

red_grp_min = [i for i in red_grp if i != sample_bl]

sample_data3 = vis_data[sample_tint, band_1[0]:band_1[1]+1, red_grp_min].T
sample_data4 = vis_data[sample_tint, band_2[0]:band_2[1]+1, red_grp_min].T

c = [chans1, chans2]
s = [sample_data3, sample_data4]
t = ['Band 1 Field 2', 'Band 2 Field 2']

lws = [2 if i == sample_bl else 1 for i in red_grp]
cs = ['grey' if i == sample_bl else 'red' for i in red_grp]

axes[0][0].plot(freqs[c[0]]/1e6, s[0].real, alpha=0.5)
axes[1][0].plot(freqs[c[0]]/1e6, s[0].imag, alpha=0.5)
axes[0][1].plot(freqs[c[1]]/1e6, s[1].real, alpha=0.5)
axes[1][1].plot(freqs[c[1]]/1e6, s[1].imag, alpha=0.5)

axes[0][0].plot(freqs[c[0]]/1e6, vis_data[sample_tint, band_1[0]:band_1[1]+1, sample_bl].real, \
                lw=1.5, c='red')
axes[1][0].plot(freqs[c[0]]/1e6, vis_data[sample_tint, band_1[0]:band_1[1]+1, sample_bl].imag, \
                lw=1.5, c='red')
axes[0][1].plot(freqs[c[1]]/1e6, vis_data[sample_tint, band_2[0]:band_2[1]+1, sample_bl].real, \
                lw=1.5, c='red')
axes[1][1].plot(freqs[c[1]]/1e6, vis_data[sample_tint, band_2[0]:band_2[1]+1, sample_bl].imag, \
                lw=1.5, c='red')

axes[1][0].set_xlabel('Frequency [MHz]')
axes[1][1].set_xlabel('Frequency [MHz]')
    
axes[0][0].set_ylabel(r'$\mathfrak{Re}(V)$')
axes[1][0].set_ylabel(r'$\mathfrak{Im}(V)$')
# axes[1].legend(loc='best')
axes[0][0].set_title(t[0])
axes[0][1].set_title(t[1])

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(8, 7), dpi=125)

red_grp_min = [i for i in red_grp if i != sample_bl]
sample_data4 = vis_data[sample_tint, band_2[0]:band_2[1]+1, red_grp_min].T


# mean WPS

s2f = 1e6*pywt.scale2frequency(wavelet, scales)/hd.channel_width

qmesh = axes[0][1].pcolormesh(b_freqs/1e6, scales+0.5, \
    np.nanmean(data2[..., bad_idx[-2], red_grp], axis=-1), norm=LogNorm(), cmap='jet')

anom_dly = pywt.scale2frequency(wavelet, sf_idx[0]+1)/hd.channel_width * 1e6
# axes[0][1].scatter(b_freqs[sf_idx[1]]/1e6, anom_dly, \
#                    s=1000, fc='None', edgecolors='cyan', lw=2, ls='--')
axes[0][1].scatter(anom_freq, sf_idx[0]+1, s=1000, fc='None', edgecolors='red', \
                lw=2, ls='--')

divider = make_axes_locatable(axes[0][1])
cax1 = divider.append_axes('right', size='5%', pad=0.1)
plt.colorbar(qmesh, cax=cax1, extend=None)

axes[0][1].invert_yaxis()
axes[0][1].set_yscale('log')
axes[0][1].set_title(r'$\overline{\mathrm{WPS}}$')
axes[0][1].set_xlabel('Frequency [MHz]')
axes[0][1].set_ylabel('Delay [$\mu$s]')

axes[0][1].set_xticks(xtk[1:-1]/1e6)
axes[0][1].set_xticklabels(intticks)

# hack to get delay ylabel
axes[0][1].set_yticks([1/(hd.channel_width*1e-5), 1/(hd.channel_width*1e-6)])
axes[0][1].set_yticklabels([r'$10^1$', r'$10^0$'])
# for correct log
min_ticks = np.concatenate((1/(hd.channel_width*np.linspace(1e-5, 1e-6, 11)), \
                            1/(hd.channel_width*np.linspace(1e-6, 1e-7, 11))))[:16]
axes[0][1].set_yticks(min_ticks, minor=True)
axes[0][1].set_yticklabels([], minor=True)


# modZ plot

im1 = axes[0][0].pcolormesh(b_freqs/1e6, scales+0.5, sample_abs_modz, \
                            norm=LogNorm(vmin=1), cmap='YlOrRd')
# circle worst pixel in scaleogram
anom_freq = b_freqs[sf_idx[1]]/1e6
axes[0][0].scatter(anom_freq, sf_idx[0]+1, s=1000, fc='None', edgecolors='cyan', \
                lw=2, ls='--')
axes[0][0].set_ylim((1, 18))
axes[0][0].invert_yaxis()
axes[0][0].set_yscale('log')
axes[0][0].set_xticks(intticks)
axes[0][0].set_xticklabels(intticks)

divider = make_axes_locatable(axes[0][0])
cax2 = divider.append_axes('right', size='5%', pad=0.1)
plt.colorbar(im1, cax=cax2, extend='min')

axes[0][0].set_title(r'$Z^{\mathrm{mod}}$')
axes[0][0].set_xlabel('Frequency [MHz]')
axes[0][0].set_ylabel('Scale')


# Visibilities

axes[1][0].plot(freqs[chans2]/1e6, sample_data4.real, alpha=0.25, c=cmap_def(0), zorder=2)
axes[1][0].plot(freqs[chans2]/1e6, sample_data4.imag, alpha=0.25, c=cmap_def(1), zorder=2)

anom_vis = vis_data[sample_tint, band_2[0]:band_2[1]+1, sample_bl]
axes[1][0].plot(freqs[chans2]/1e6, anom_vis.real, lw=1.5, c=cmap_def(0), label=r'$\mathfrak{Re}$', zorder=3)
axes[1][0].plot(freqs[chans2]/1e6, anom_vis.imag, lw=1.5, c=cmap_def(1), label=r'$\mathfrak{Im}$', zorder=3)

axes[1][0].set_xlabel('Frequency [MHz]')
axes[1][0].set_ylabel(r'$V$ [Jy]')
axes[1][0].legend(loc='best')
axes[1][0].set_xlim(*axes[0][0].get_xlim())
axes[1][0].set_xticks(intticks)
axes[1][0].set_xticklabels(intticks)

# shade anomalous vis area
axes[1][0].axvspan(anom_freq-1.25, anom_freq+1.25, alpha=0.1, color='red', zorder=1)


# Power spectra

delay, pspec = signal.periodogram(vis_data[sample_tint, band_2[0]:band_2[1]+1, red_grp], \
    fs=1/ hd.channel_width, window='blackmanharris', scaling='spectrum', nfft=chans2.size, \
    detrend=False, return_onesided=False)

delay_sort = np.argsort(delay)
delay = delay[delay_sort]
pspec = pspec[:, delay_sort]

anom_idx = red_grp.tolist().index(sample_bl)
axes[1][1].plot(delay*1e6, pspec[red_grp_min - min(red_grp_min), :].T, c='purple', alpha=0.25, zorder=2)
axes[1][1].plot(delay*1e6, pspec[anom_idx, :], c='deeppink', lw=1.5, zorder=3)

axes[1][1].set_yscale('log')
axes[1][1].set_xlabel(r'Delay [$\mu$s]')
axes[1][1].set_ylabel(r'PS [Jy$^2$ Hz$^2$]')

# shade anomalous ps area
dly_spr = 0.2
axes[1][1].axvspan(-anom_dly-dly_spr, -anom_dly+dly_spr, alpha=0.1, color='red', zorder=1)
axes[1][1].axvspan(anom_dly-dly_spr, anom_dly+dly_spr, alpha=0.1, color='red', zorder=1)

axes[1][1].set_xlim(delay[0]*1e6, delay[-1]*1e6)

fig.tight_layout()

# save_fig_dir = '/lustre/aoc/projects/hera/mmolnar/figs'
# plt.savefig(os.path.join(save_fig_dir, 'modz_wps_vis_ps.pdf'), bbox_inches='tight')

plt.show()

In [None]:
# check auto-PS for the baseline
delay, pspec = signal.periodogram(sample_data1, fs=1/freq_resolution, \
    window='blackmanharris', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False)

delay_sort = np.argsort(delay)
delay = delay[delay_sort]
pspec = pspec[delay_sort]

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=125)

ax.plot(delay, pspec, alpha=1)

ax.set_yscale('log')
ax.set_ylabel('Power spectrum')
ax.set_xlabel('Delay')

plt.tight_layout()
plt.show()