# RR-LL delay tests

[L. Blackburn, Sep 2018; rewritten for Python 3.9, Dec 2022]

The purpose of this test is to check stability of RR-LL delay alignment for baselines in the array. We expect R-L delay to be stable at each antenna, thus baseline RR-LL delay should also be stable. RR-LL is a cleaner signal than the RL or LR necessary for a direct measurement of R-L at a single site because it has less relative contamination from leakage. Also because ALMA XY feeds are aligned, ALMA is able to be used as a reference by assuming R-L delay at ALMA is exactly zero.

In [None]:
# basic import and helper functions
import pandas as pd
from eat.io import hops, util
from eat.hops import util as hu
from eat.plots import util as pu
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import seaborn as sns

sns.reset_orig()
# sns.set_palette(sns.color_palette(sns.hls_palette(16, l=.6, s=.6)))
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# %config InlineBackend.figure_formats=['svg']

nb_stdout = sys.stdout # grab for later

def wide(w=8, h=3): plt.setp(plt.gcf(), figwidth=w, figheight=h); \
    plt.tight_layout()

def tightx(): plt.autoscale(enable=True, axis='x', tight=True)

def multline(xs, fun=plt.axvline):
    for x in xs: fun(x, alpha=0.25, ls='--', color='k')

def toiter(x):
    return(x if hasattr(x, '__iter__') else [x,])

# pd.options.display.float_format = '{:,.6f}'.format
from IPython.display import display, HTML
display(HTML("<style>"
    + "#notebook { padding-top:0px !important; } " 
    + ".container { width:100% !important; } "
    + ".end_space { min-height:0px !important; } "
    + "</style>"))

In [None]:
# define and load data
alistf = 'alist.v6'
datadir = os.environ['DATADIR']

a = util.noauto(hops.read_alist(os.path.join(datadir, alistf)))

# Pre-process alist dataframe
util.fix(a)
util.unwrap_mbd(a)
util.add_days(a)
util.add_delayerr(a)
util.add_path(a)
util.add_scanno(a)

Apply data filters and modify the polarization column to handle mixed polarization data. In mixedpol basis, all four correlations can be used in computing RR-LL difference since all four Stokes components are present in each correlation product. To enable this, we relabel XL and YL to LL and XR and YR to RR.

In [None]:
# data filters -- remove SMAR-SMAW baselines (only applicable to EHT2017 data)
thres = 7.0
a = a[(a.snr > thres) & ~a.baseline.isin({'RS', 'SR'})].copy()

# Relabel polarizations if mixedpol visibilities are present
a['polarization'] = a.polarization.replace({'XL':'LL', 'YL':'LL', 'XR':'RR', 'YR':'RR'})

In [None]:
# Compute the boundaries between expt_nos
sorted_a = a.sort_values(['expt_no', 'scan_no'])
last_scans = sorted_a.groupby('expt_no')['scan_no'].max() # Find the 'max' scan_no for each expt_no
elines = (last_scans.iloc[:-1] + 0.5).to_numpy() # Drop the final expt_no and offset by 0.5

In [None]:
# calculate the segmented statistics
(p, stats) = hu.rrll_segmented(a, restarts=hu.restarts)

# filter out ALMA and non-ALMA baselines
wa = sorted([bl for bl in set(p.index.get_level_values('baseline')) if bl[0] == 'A'])
na = sorted([bl for bl in set(p.index.get_level_values('baseline')) if bl[0] != 'A'])

In [None]:
stats.loc[(slice(None),slice(None),wa),:]

In [None]:
# Make rrll plots for ALMA and non-ALMA baselines in different subplots

plt.subplot(3, 1, 1)
hu.rrllplot(p, baselines=wa, vlines=elines)
plt.xlim(0, 1.05*plt.xlim()[1])
plt.title('RR-LL delay after subtracting mean value [%.0f MHz]' % (p.iloc[0].ref_freq))

plt.subplot(3, 1, 2)
hu.rrllplot(p, baselines=na[:len(na)//2], vlines=elines)
plt.xlim(0, 1.05*plt.xlim()[1])

plt.subplot(3, 1, 3)
hu.rrllplot(p, baselines=na[len(na)//2:], vlines=elines)
plt.xlim(0, 1.05*plt.xlim()[1])
wide(12, 10)

In [None]:
# table of outliers
outliers = (p.LLRR_offset.abs() > 0.000050) & (p.LLRR_std.abs() > 5)
p.loc[outliers, "expt_no scan_id source timetag mbd_unwrap LLRR_offset LLRR_std".split()]

In [None]:
# scatter plot shows balancing between systematic error and bandwidth inflation factor
# we want to see relatively well-behaved distribution across multiple SNR
plt.semilogx(p['LLRR_err'].values, p['LLRR_std'].values, '.')
plt.ylim(-5, 5)
plt.xlabel('predicted LL-RR error')
plt.ylabel('sigmas away from mean')

plt.gca().yaxis.grid(ls='--', alpha=0.5)
wide(10, 5)

Plot histograms of deviation from mean for ALMA and non-ALMA baselines separately.

In [None]:
# histogram of sigmas deviation
from scipy.stats import norm

lim = 8 # np.ceil(np.max(np.abs(p.LLRR_std)))
xx = np.linspace(-lim, lim, 200)
bins = np.linspace(-lim, lim, 161)

for baselines in [wa, na]:
    q = p.loc[(slice(None),slice(None),baselines),:]
    (names, vals) = zip(*[(bl, rows.LLRR_std) for (bl, rows) in q.groupby('baseline')])
    names2 = list(name + ': %.1f' % np.sqrt(np.mean(val**2)) for (name, val) in zip(names, vals))
    plt.hist(vals, bins=bins, histtype='barstacked', alpha=1.0, label=names2, density=True)
    
    # Plot the normal distribution for comparison
    plt.plot(xx, norm.pdf(xx, loc=0, scale=1.0), 'k--', alpha=0.5)
    
    plt.xlabel('std away from mean')
    plt.ylabel('distribution of scans')
    plt.title('RR-LL delay offsets after subtracting mean value [%.0f MHz]' % (p.iloc[0].ref_freq))
    plt.legend(loc='upper right')
    plt.grid(alpha=0.25)
    
    std = np.mean(q.LLRR_std**2)
    pu.tag('N = %d, std=%.1f' % (len(q), std), loc='upper left')
    plt.xlim(-lim, lim) # only show bulk distribution
    
    wide(12, 4.5)
    plt.show()