<center><strong><font size=+3>KDE plot videos of aggregated HERA visibilities</font></center>
<br><br>
</center>
<center><strong><font size=+2>Matyas Molnar and Bojan Nikolic</font><br></strong></center>
<br><center><strong><font size=+1>Astrophysics Group, Cavendish Laboratory, University of Cambridge</font></strong></center>

This notebook produces videos of kernel density estimate plots of HERA visibilities that are aggregated in time across Julian dates and over redundant baseline groups. The density plots indicate the quality/normality of the data and can reveal any instrumental/calibration effects that may have crept into the data.

In [None]:
import glob
import os
import shutil
import tempfile
from IPython.display import Video
from pathlib import Path
from pdf2image import convert_from_path

import numpy as np
import seaborn as sns
from matplotlib import gridspec
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation

from robstat.hera_vis import agg_tint_rephase
from robstat.plotting import SeabornFig2Grid
from robstat.robstat import geometric_median
from robstat.stdstat import rsc_mean
from robstat.utils import DATAPATH, FIGSPATH, flt_nan

In [None]:
no_bins_agg = 2 # averaging over n consecutive time bins in LST averaging
bl_grp = 0 # only look at 0th baseline group
save_dir = os.path.join(FIGSPATH, 'kde_videos_idr2_lstb_14m_ee_1.40949')
save_vids = True
overwrite = False

In [None]:
xd_vis_file = os.path.join(DATAPATH, 'lstb_no_avg/idr2_lstb_14m_ee_1.40949.npz')
sample_xd_data = np.load(xd_vis_file)

In [None]:
xd_data = sample_xd_data['data']
xd_redg = sample_xd_data['redg']
xd_pol = sample_xd_data['pol'].item()
xd_rad_lsts = sample_xd_data['lsts']
xd_hr_lsts = xd_rad_lsts*12/np.pi # in hours
JDs = sample_xd_data['JDs']
no_days = JDs.size

if 'lstb_no_avg' in xd_vis_file:
    lstb_format = True

    band_1 = [175, 334]
    band_2 = [515, 694]

    band_i = band_2 # select band here
    chans = np.arange(band_i[0], band_i[1]+1)
    plt_chans = chans

    # data dimensions (2xdays, freqs, times, bls)
    xd_data = xd_data[:, chans, ...]
    
    xd_flags = np.isnan(xd_data)
    new_no_tints = xd_data.shape[2]
    no_bins_agg = 1
    avg_hr_lsts = xd_hr_lsts

else:
    lstb_format = False
    # dimensions (days, freqs, times, bls)
    xd_flags = sample_xd_data['flags']
    xd_data[xd_flags] *= np.nan

    avg_hr_lsts = np.mean(xd_hr_lsts.reshape(-1, no_bins_agg), axis=1)

    freqs = sample_xd_data['freqs']
    chans = sample_xd_data['chans']
    if chans[-1]%100 == 99:
        plt_chans = np.append(chans, chans[-1]+1)
    else:
        plt_chans = chans
        
    # rephase if averaging over consecutive time bins
    if 'rph' in os.path.basename(xd_vis_file) and no_bins_agg > 1:
        print('Rephasing visibilities such that every {} rows in time have the same phase centre.'.format(no_bins_agg))
        xd_antpos = np.load(xd_vis_file, allow_pickle=True)['antpos'].item()
        xd_data = agg_tint_rephase(xd_data, xd_redg, freqs, xd_pol, xd_rad_lsts, xd_antpos, \
                                   no_bins_agg=no_bins_agg)

In [None]:
slct_bl_idxs = np.where(xd_redg[:, 0] == bl_grp)[0]
data = xd_data[..., slct_bl_idxs]
flags = xd_flags[..., slct_bl_idxs]
slct_red_bl = xd_redg[slct_bl_idxs[0], :][1:]
xd_data_b = xd_data[..., slct_bl_idxs]
print('Looking at baselines redundant to ({}, {}, \'{}\')'.\
      format(*slct_red_bl, xd_pol))

### Aggregated across baselines, iterating through frequency

KDE plots across the H1C_IDR2 JDs for the same baseline group, showing the variability of the visibility density for different days.

In [None]:
if os.path.basename(save_dir) == 'kde_videos':
    # safety check to avoid overwriting good videos
    save_vids = False
    overwrite = False

if save_vids:
    if not os.path.exists(save_dir):
        Path(save_dir).mkdir(parents=True, exist_ok=True)
else:
    save_dir = tempfile.mkdtemp()

In [None]:
indiv_plt_prfx = 'kde_jd_'
kde_jd_vid_fn = 'kde_jd_final.mp4'
jd_vid_exists = os.path.exists(os.path.join(save_dir, kde_jd_vid_fn))

if not jd_vid_exists or overwrite:

    no_cols = 5
    no_rows = int(no_cols*np.ceil(JDs.size/no_cols) / no_cols)

    # select sample time that has the fewest flags
    slct_t_idx = np.argmin(np.isnan(xd_data_b).sum(axis=(0, 1, 3)).\
                           reshape((-1, no_bins_agg)).sum(axis=1))*no_bins_agg

    pctc = 99
    pad = 5

    clip_data = xd_data_b[:, :, slct_t_idx:slct_t_idx+no_bins_agg, :]
    re_lim = (np.floor(np.nanpercentile(clip_data.real, 100-pctc)) - pad, \
              np.nanpercentile(clip_data.real, pctc) + pad)
    im_lim = (np.floor(np.nanpercentile(clip_data.imag, 100-pctc)) - pad, \
              np.nanpercentile(clip_data.imag, pctc) + pad)
    
    plt_dir = os.path.join(save_dir, '{}plots'.format(indiv_plt_prfx))
    if not os.path.exists(plt_dir):
        Path(plt_dir).mkdir(parents=True, exist_ok=True)

    for freq in range(xd_data_b.shape[1]):
        gplots = []
        count = 0
        lcount = 0
        for row in range(no_rows):
            for col in range(no_cols):
                if (row*no_cols)+col <= JDs.size-1:
                    jd_data = flt_nan(xd_data_b[count, freq, slct_t_idx:slct_t_idx+no_bins_agg, :].flatten())
                    g = sns.jointplot(x=jd_data.real, y=jd_data.imag, \
                                      kind='kde', height=8, cmap='Blues', fill=True, space=0, \
                                      xlim=re_lim, ylim=im_lim)

                    if not np.isnan(jd_data).all():
                        jd_gmed = geometric_median(jd_data)
                        jd_hmean = rsc_mean(jd_data)
                        g.ax_joint.plot(jd_gmed.real, jd_gmed.imag, 'ro', label='Geometric Median')
                        g.ax_joint.plot(jd_hmean.real, jd_hmean.imag, 'go', label='HERA Mean')
                        if lcount == 0:
                            g.ax_joint.legend(prop={'size': 7}, loc='upper right')
                        lcount += 1

                    if count == 0:
                        g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
                            xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
                            size=8)
                        g.ax_joint.annotate('Chan: {}, LST: {:.3f}'.format(chans[freq], \
                            np.mean(xd_hr_lsts[[slct_t_idx, slct_t_idx+no_bins_agg-1]])), \
                            xy=(0.05, 0.95), xycoords='axes fraction', bbox= dict(boxstyle='round', \
                            facecolor='white'), size=8)
                    g.ax_joint.annotate(str(JDs[count]), xy=(0.8, 0.05), \
                        xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
                        size=8)

                    g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=8, labelpad=-2)

                    gplots.append(g)
                    count += 1
                    plt.close() # suppress individual plots from showing in notebook

        fig = plt.figure(figsize=(20, 16))
        gs = gridspec.GridSpec(no_rows, no_cols)

        for i, gplot in enumerate(gplots):
            _ = SeabornFig2Grid(gplot, fig, gs[i])

        gs.tight_layout(fig)

        fig.savefig(os.path.join(plt_dir, '{}{:04}.pdf'.format(indiv_plt_prfx, freq)))
        plt.close()

    # make videos from pdf plots
    pdf_fns = sorted(glob.glob(os.path.join(plt_dir, '{}*.pdf'.format(indiv_plt_prfx))))

    # converting plot pdfs to pngs
    for pdf_fn in pdf_fns:
        p = convert_from_path(pdf_fn, dpi=200)
        p[0].save(pdf_fn.replace('.pdf', '.png'), 'png')

    # make video with ffmpeg
    cwd = os.getcwd()
    os.chdir(save_dir)
    os.system('ffmpeg -hide_banner -loglevel error -y -r 1.5 -i {}%04d.png -c:v '\
              'libx264 -r 30 -pix_fmt yuv420p {}'.\
              format(os.path.join(plt_dir, indiv_plt_prfx), kde_jd_vid_fn))
    os.chdir(cwd)

In [None]:
Video(os.path.join(save_dir, kde_jd_vid_fn), width=600, embed=True)

### Aggregated across days, iterating through frequency

KDE plots across days for each baseline the the selected redundant group, showing the variability of the visibility density for each baseline.

In [None]:
indiv_plt_prfx = 'kde_bl_'
kde_bl_vid_fn = 'kde_bl_final.mp4'
bl_vid_exists = os.path.exists(os.path.join(save_dir, kde_bl_vid_fn))

if not bl_vid_exists or overwrite:

    bls = xd_redg[:, 1:][slct_bl_idxs]
    no_bls = bls.shape[0]
    
    no_cols = 5
    no_rows = int(no_cols*np.ceil(no_bls/no_cols) / no_cols)

    # select sample time that has the fewest flags
    slct_t_idx = np.argmin(np.isnan(xd_data_b).sum(axis=(0, 1, 3)).\
                           reshape((-1, no_bins_agg)).sum(axis=1))*no_bins_agg

    pctc = 99
    pad = 5

    clip_data = xd_data_b[:, :, slct_t_idx:slct_t_idx+no_bins_agg, :]
    re_lim = (np.floor(np.nanpercentile(clip_data.real, 100-pctc)) - pad, \
              np.nanpercentile(clip_data.real, pctc) + pad)
    im_lim = (np.floor(np.nanpercentile(clip_data.imag, 100-pctc)) - pad, \
              np.nanpercentile(clip_data.imag, pctc) + pad)
    
    plt_dir = os.path.join(save_dir, '{}plots'.format(indiv_plt_prfx))
    if not os.path.exists(plt_dir):
        Path(plt_dir).mkdir(parents=True, exist_ok=True)

    for freq in range(xd_data_b.shape[1]):
        gplots = []
        count = 0
        lcount = 0
        for row in range(no_rows):
            for col in range(no_cols):
                if (row*no_cols)+col <= no_bls-1:
                    
                    bl_data = flt_nan(xd_data_b[:, freq, slct_t_idx:slct_t_idx+no_bins_agg, count].flatten())
                    
                    if bl_data.size > 2:
                        g = sns.jointplot(x=bl_data.real, y=bl_data.imag, \
                                          kind='kde', height=8, cmap='Blues', fill=True, space=0, \
                                          xlim=re_lim, ylim=im_lim)
                    else:
                        g = sns.jointplot(x=bl_data.real, y=bl_data.imag, \
                                          kind='scatter', height=8, cmap='Blues', space=0, \
                                          xlim=re_lim, ylim=im_lim)                        

                    if not np.isnan(bl_data).all():
                        bl_gmed = geometric_median(bl_data)
                        bl_hmean = rsc_mean(bl_data)
                        g.ax_joint.plot(bl_gmed.real, bl_gmed.imag, 'ro', label='Geometric Median')
                        g.ax_joint.plot(bl_hmean.real, bl_hmean.imag, 'go', label='HERA Mean')
                        if lcount == 0:
                            g.ax_joint.legend(prop={'size': 7}, loc='upper right')
                        lcount += 1

                    g.ax_joint.annotate('Chan: {}, LST: {:.3f}'.format(chans[freq], \
                        np.mean(xd_hr_lsts[[slct_t_idx, slct_t_idx+no_bins_agg-1]])), \
                        xy=(0.05, 0.95), xycoords='axes fraction', bbox= dict(boxstyle='round', \
                        facecolor='white'), size=8)
                    g.ax_joint.annotate(tuple(bls[count, :]) + (str(xd_pol),), xy=(0.05, 0.05), \
                        xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
                        size=8)      

                    g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=8, labelpad=-2)

                    gplots.append(g)
                    count += 1
                    plt.close() # suppress individual plots from showing in notebook

        fig = plt.figure(figsize=(20, 16))
        gs = gridspec.GridSpec(no_rows, no_cols)

        for i, gplot in enumerate(gplots):
            _ = SeabornFig2Grid(gplot, fig, gs[i])

        gs.tight_layout(fig)

        fig.savefig(os.path.join(plt_dir, '{}{:04}.pdf'.format(indiv_plt_prfx, freq)))
        plt.close()

    # make videos from pdf plots
    pdf_fns = sorted(glob.glob(os.path.join(plt_dir, '{}*.pdf'.format(indiv_plt_prfx))))

    # converting plot pdfs to pngs
    for pdf_fn in pdf_fns:
        p = convert_from_path(pdf_fn, dpi=200)
        p[0].save(pdf_fn.replace('.pdf', '.png'), 'png')

    # make video with ffmpeg
    cwd = os.getcwd()
    os.chdir(save_dir)
    os.system('ffmpeg -hide_banner -loglevel error -y -r 1.5 -i {}%04d.png -c:v '\
              'libx264 -r 30 -pix_fmt yuv420p {}'.\
              format(os.path.join(plt_dir, indiv_plt_prfx), kde_bl_vid_fn))
    os.chdir(cwd)

In [None]:
Video(os.path.join(save_dir, kde_bl_vid_fn), width=600, embed=True)

### Aggregated across days and baselines, iterating through either frequency or LST

Aggregation over both redundant baselines and JDs at the same LST (most amount of coherent averaging).

In [None]:
kde_slice_vid_prfx = 'kdes_'

slct_btint = 0 # for fixed time
slct_freq_idx = 0 # for fixed frequency
slice_dim = 'freq' # dimension along which video shows KDEs
vid_dn = os.path.join(save_dir, '{}{}.mp4'.format(kde_slice_vid_prfx, slice_dim))
pctc = 99.95 # percentile clip to condense plots

slice_vid_exists = os.path.exists(vid_dn)

if not slice_vid_exists or overwrite:
    
    new_no_tints = int(xd_data_b.shape[2] / no_bins_agg)

    s_dict = {'freq': (xd_data_b.shape[1], slct_btint, 2), \
              'tint': (new_no_tints, slct_freq_idx, 1)}
    # tuples are (axis in loc res arrays, index (of opposite dimension) at which dimension is sliced across, 
    # axis in xd_data_b to index from, number of iterations)

    def get_data(d_idx):
        if slice_dim == 'tint':
            btint_idx = d_idx
            freq_idx = slct_freq_idx
        if slice_dim == 'freq':
            btint_idx = slct_btint
            freq_idx = d_idx
        return xd_data_b[:, freq_idx, no_bins_agg*btint_idx:no_bins_agg*btint_idx+no_bins_agg, :].flatten()

    initd = get_data(0)
    g = sns.JointGrid(x=flt_nan(initd).real, y=flt_nan(initd).imag, height=8)
    g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14, labelpad=-2)

    clip_data = np.take(xd_data_b, s_dict[slice_dim][1], axis=s_dict[slice_dim][2])
    re_lim = (np.floor(np.nanpercentile(clip_data.real, 100-pctc)), np.nanpercentile(clip_data.real, pctc))
    im_lim = (np.floor(np.nanpercentile(clip_data.imag, 100-pctc)), np.nanpercentile(clip_data.imag, pctc))

    def prep_axes(g, xlim, ylim):
        g.ax_joint.clear()
        g.ax_joint.set_xlim(xlim)
        g.ax_joint.set_ylim(ylim)
        g.ax_marg_x.clear()
        g.ax_marg_x.set_xlim(xlim)
        g.ax_marg_y.clear()
        g.ax_marg_y.set_ylim(ylim)
        plt.setp(g.ax_marg_x.get_xticklabels(), visible=False)
        plt.setp(g.ax_marg_y.get_yticklabels(), visible=False)
        plt.setp(g.ax_marg_x.yaxis.get_majorticklines(), visible=False)
        plt.setp(g.ax_marg_x.yaxis.get_minorticklines(), visible=False)
        plt.setp(g.ax_marg_y.xaxis.get_majorticklines(), visible=False)
        plt.setp(g.ax_marg_y.xaxis.get_minorticklines(), visible=False)
        plt.setp(g.ax_marg_x.get_yticklabels(), visible=False)
        plt.setp(g.ax_marg_y.get_xticklabels(), visible=False)

    def animate(d_idx):
        data_slice = get_data(d_idx)
        g.x, g.y = flt_nan(data_slice).real, flt_nan(data_slice).imag
        prep_axes(g, re_lim, im_lim)
        g.plot_joint(sns.kdeplot, cmap='Blues', fill=True)
        g.plot_marginals(sns.kdeplot, shade=True)
        g.set_axis_labels(r'$\mathfrak{Re} \; (V)$', r'$\mathfrak{Im} \; (V)$', size=14)

        if slice_dim == 'tint':
            btint_idx = d_idx
            freq_idx = slct_freq_idx
        if slice_dim == 'freq':
            btint_idx = slct_btint
            freq_idx = d_idx
        
        if not np.isnan(data_slice).all():
            s_gmed = geometric_median(data_slice)
            s_hmean = rsc_mean(data_slice)
            med_ests_f = list(zip([s_gmed, s_hmean], \
                                  ['Geometric Median', 'HERA Mean'], \
                                  ['ro', 'go']))
            for i, med_est in enumerate(med_ests_f):
                g.ax_joint.plot(med_est[0].real, med_est[0].imag, med_est[2], label=med_est[1])
            g.ax_joint.legend(prop={'size': 10}, loc='upper right')
            
        g.ax_joint.annotate(tuple(slct_red_bl) + (str(xd_pol),), xy=(0.05, 0.05), \
            xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
            size=12)
        g.ax_joint.annotate('Chan: {}, Tint: {}'.format(chans[freq_idx], np.arange(new_no_tints)[btint_idx]), \
            xy=(0.05, 0.95), xycoords='axes fraction', bbox= dict(boxstyle='round', facecolor='white'), \
            size=12)

    ani = FuncAnimation(g.fig, animate, frames=s_dict[slice_dim][0], \
                        interval=600, repeat=False)

    ani.save(vid_dn, writer='ffmpeg', dpi=200)

    plt.close()

In [None]:
Video(vid_dn, width=600, embed=True)

In [None]:
if not save_vids:
    shutil.rmtree(save_dir)