<center><strong><font size=+3>CWT PS Analysis in Napari</font></center>
<br><br>
</center>
<center><strong><font size=+2>Matyas Molnar and Bojan Nikolic</font><br></strong></center>
<br><center><strong><font size=+1>Astrophysics Group, Cavendish Laboratory, University of Cambridge</font></strong></center>

### View CWT products in napari

In [None]:
import numpy as np
from astropy.stats import mad_std, sigma_clip
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy import signal

import pywt
import scaleogram as scg

from hera_cal.io import HERAData

In [None]:
npz_f1 = np.load('cwt_power_b1.npz')
npz_f2 = np.load('cwt_power_b2.npz')
data1 = npz_f1['power']
data2 = npz_f2['power']

In [None]:
lsts = npz_f1['lsts']
redg = npz_f1['redg']
chans1 = npz_f1['freqs']
chans2 = npz_f2['freqs']

In [None]:
freqs = np.linspace(1e8, 2e8, 1024+1)[:-1]
freq_resolution = np.median(np.ediff1d(freqs))

In [None]:
# # ant filt to make more manageable
# data1 = data1[..., 100:200]
# data2 = data2[..., 100:200]

In [None]:
band_1 = [175, 334]
band_2 = [515, 694]

field_1 = [1.25, 2.70]
field_2 = [4.50, 6.50]
field_3 = [8.50, 10.75]

In [None]:
f1 = np.where((lsts > field_1[0]) & (lsts < field_1[1]))[0]
print(f1)

In [None]:
f2 = np.where((lsts > field_2[0]) & (lsts < field_2[1]))[0]
print(f2)

In [None]:
zen_lstb = '/Users/matyasmolnar/Downloads/HERA_Data/sample_data/zen.grp1.of1.LST.1.31552.HH.OCRSL.uvh5'
hd = HERAData(zen_lstb)

antsep = {}
for row in redg:
    antsep[tuple(row[1:])] = np.abs(hd.antpos[row[2]] - hd.antpos[row[1]])

proj_ew = 14
# baselines with projected EW length < 14 m
nan_bls = [i for i, (k, v) in enumerate(antsep.items()) if v[0] < proj_ew]
ok_bls = [i for i, (k, v) in enumerate(antsep.items()) if v[0] > proj_ew]

In [None]:
data1[..., nan_bls] *= np.nan
data2[..., nan_bls] *= np.nan

In [None]:
if False:
    
    import napari

    # get two bands side by side
    # careful because B1 and B2 will have different scales..
    data_m = np.concatenate((data1, data2), axis=1)
    # data_m = data2

    # only look at times from Field 2
    # in napari - set auto-constrast to "once", and adjust contrast limits & gamma
    # found that FPS 8 works well, with play mode "back and forth" (for time axis)

    viewer = napari.view_image(np.log(data_m[:, :, :, :]), colormap='turbo', ndisplay=2, order=(2, 3, 0, 1), \
                               gamma=1, interpolation='nearest', scale=(8, 1, 1, 1))

### Automatic detection

In [None]:
med = np.nanmedian(data1, axis=(2, 3))

mad = mad_std(data1, axis=(2, 3), ignore_nan=True)

In [None]:
# pass ignore_nan=True argument to astropy mad_std
def mad_std_(d, axis=None):
    return mad_std(d, axis=axis, func=None, ignore_nan=True)

In [None]:
sc = sigma_clip(data1, cenfunc=np.nanmedian, stdfunc=mad_std_, \
                sigma=5, maxiters=1, axis=(2, 3))
sc.set_fill_value(value=np.nan)

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125, sharey=True)
im1 = axes[0].imshow(med, aspect='auto', interpolation='none', cmap='jet', norm=LogNorm())
im2 = axes[1].imshow(mad, aspect='auto', interpolation='none', cmap='jet', norm=LogNorm())

divider = make_axes_locatable(axes[0])
cax1 = divider.append_axes('right', size='5%', pad=0.05)   
plt.colorbar(im1, cax=cax1)

divider = make_axes_locatable(axes[1])
cax2 = divider.append_axes('right', size='5%', pad=0.05)   
plt.colorbar(im2, cax=cax2)

axes[0].set_title('Median')
axes[1].set_title('MAD')

plt.tight_layout()
plt.show()

In [None]:
clip_thresh = 1e0

hp = mad > clip_thresh

fig, ax = plt.subplots(figsize=(4, 4), dpi=125, sharey=True)
ax.imshow(hp, aspect='auto', interpolation='None')
plt.tight_layout()
plt.show()

In [None]:
clipped = sc.mask

In [None]:
clipped[hp, ...] = False

In [None]:
clipped.nonzero()

### Notes v1

**These notes were taken when analysing all baselines that do not contained flagged antennas. It was then noted that baselines with projected EW < 14 metres are discarded in the power spectrum computation, so the analysis was repeated, with comments written in Notes v2.**

In the below by "delay" we mean delay of the wavelet, with wavelets of small scale -> compressed wavelet -> rapidly changing details -> higher delay (since the signal is in frequency space to start off with)

Redundant analysis:
 - B1 baseline group 1, 3 power at high-ish delays
 - B2F2 baseline group 7 power at low delays
 - B1 & B2 baseline group 12 and 13 has more power at mid delays and high
 - B1 & B2 baseline group 20, 30, 31, 45 higher power at mid delays
 - B1 baseline group 67, 68, 72, 74, 82, 83, 88, 101 localized power at mid delays
 - B1 & B2 baseline group 77, 78, 81, 99, 105, 106 localized power at mid delays
 
All baselines analysis:

 - B1 baseline 12, 16, 69, 74 (bad), 212, 230, 324, 540, 655, 657, 660, 731 power at mid delays
 - B2 baseline 23, 27, 71, 196, 198 power at mid delays
 - B1 & B2 baseline 28, 29, 30, 33 (v bad), 37 (bad), 75, 77, 200, 201 (bad), 202, 203, 206, 208 (bad), 209, 210, 218 (bad), 220, 221, 227, 228, 315 (bad), 319, 321 (bad), 322, 323, 423, 426, 430, 439, 543, 544, 546, 659  power at mid delays
 - B1 baseline 1, 6, 8, 9, 20, 21, 25, 31, 34, 73, 79, 216, 223, 224, 225, 424, 440, 662, 696 localized power at mid delays
 - B2 baseline 3, 19, 35, 195, 212, 313, 432 localized power at mid delays
 - B1 & B2 baseline 14, 17, 204, 217, 219, 222, 316, 317, 433, 434, 436 localized power at mid delays
 
 
Other notes:
 - For B1 get localized power at mid delays at higher end of frequency band - recurring spot for a few baselines
 - Bls 33, 37, 74 bad, with lots of power at mid delays, especially in Band 1
 - Features do not appear transient in time - high power seems to be present across times for specific baselines
 - B1 seems worse than B2 for Field 2? looking at H1C limits, expect B1 to be worse, so effects will be more noticeable, especially when comparing on the same scale

### Notes v2

### Look at some example slices where CWT looks bad

In [None]:
hr_full_fn = 'h1c_idr2.OCRSLP2XTK.npz'
vis_data = np.load(hr_full_fn)['arr_0']

In [None]:
sample_tint = 15
sample_bl = 50
band = band_1
print(f'Examining baseline {redg[sample_bl, :][1:]} at LST {lsts[sample_tint]:.3f}')

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125, sharey=True)
axes[0].imshow(np.log10(data1[..., f2[sample_tint], sample_bl]), aspect='auto', interpolation='none', cmap='jet')
axes[1].imshow(np.log10(data2[..., f2[sample_tint], sample_bl]), aspect='auto', interpolation='none', cmap='jet')
axes[0].set_title('Band 1')
axes[1].set_title('Band 2')
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125, sharey=True)

wavelet = npz_f1['wavelet'].item()
scales = npz_f1['scales']

# # set same values scale
# sdata = np.concatenate((data1[..., sample_tint, sample_bl], data2[..., sample_tint, sample_bl]), axis=1)
# vmin = sdata.min()
# vmax = sdata.max()
# vlims = (vmin, vmax)
vlims = None

sample_data1 = vis_data[sample_tint, band_1[0]:band_1[1]+1, sample_bl]
sample_data2 = vis_data[sample_tint, band_2[0]:band_2[1]+1, sample_bl]

r = scg.cws(freqs[chans1], sample_data1, scales=scales, wavelet=wavelet, cscale='log', coi=True, \
            ax=axes[0], spectrum='power', yaxis='frequency', title='CWT PS B1', \
            xlabel='Frequency', ylabel='Delay', yscale='log', cwt_fun='pywt', vlims=vlims)

_ = scg.cws(freqs[chans2], sample_data2, scales=scales, wavelet=wavelet, cscale='log', coi=True, \
            ax=axes[1], spectrum='power', yaxis='frequency', title='CWT PS B2', \
            xlabel='Frequency', ylabel='Delay', yscale='log', cwt_fun='pywt', vlims=vlims)

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125)

c = [chans1, chans2]
s = [sample_data1, sample_data2]
t = ['Band 1 Field 2', 'Band 2 Field 2']

for i, ax in enumerate(axes):
    ax.plot(freqs[c[i]]/1e6, s[i].real, label=r'$\mathfrak{Re}(V)$')
    ax.plot(freqs[c[i]]/1e6, s[i].imag, label=r'$\mathfrak{Im}(V)$')
    ax.set_xlabel('Frequency [MHz]')
    ax.set_title(t[i])
    
axes[0].set_ylabel('Visibility')
axes[1].legend(loc='best')

plt.tight_layout()
plt.show()

In [None]:
# Look at another baseline in same redundant group to see if visibilities are similar
bl_grp = redg[sample_bl][0]
red_grp = np.where(redg[:, 0] == bl_grp)[0]
slct_bl = 1

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(7.5, 4), dpi=125)

sample_data3 = vis_data[sample_tint, band_1[0]:band_1[1]+1, red_grp[slct_bl]]
sample_data4 = vis_data[sample_tint, band_2[0]:band_2[1]+1, red_grp[slct_bl]]

c = [chans1, chans2]
s = [sample_data3, sample_data4]
t = ['Band 1 Field 2', 'Band 2 Field 2']

for i, ax in enumerate(axes):
    ax.plot(freqs[c[i]]/1e6, s[i].real, label=r'$\mathfrak{Re}(V)$')
    ax.plot(freqs[c[i]]/1e6, s[i].imag, label=r'$\mathfrak{Im}(V)$')
    ax.set_xlabel('Frequency [MHz]')
    ax.set_title(t[i])
    
axes[0].set_ylabel('Visibility')
axes[1].legend(loc='best')

plt.tight_layout()
plt.show()

In [None]:
# check auto-PS for the baseline

delay, pspec = signal.periodogram(sample_data1, fs=1/freq_resolution, \
    window='blackmanharris', scaling='spectrum', nfft=None, detrend=False, \
    return_onesided=False)

delay_sort = np.argsort(delay)
delay = delay[delay_sort]
pspec = pspec[delay_sort]

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=125)

ax.plot(delay, pspec, alpha=1)

ax.set_yscale('log')
ax.set_ylabel('Power spectrum')
ax.set_xlabel('Delay')

plt.tight_layout()
plt.show()

## Statisticall