In [1]:
import numpy as np
import pyuvdata

In [2]:
use_autos=False

model_path = "/Users/ruby/Astro/FHD_outputs/fhd_rlb_model_GLEAM_Aug2021"
model_use_model = True
data_path = "/Users/ruby/Astro/FHD_outputs/fhd_rlb_model_GLEAM_Aug2021"
data_use_model = True
obsid = "1061316296"
pol = "XX"

model_filelist = [
    "{}/{}".format(model_path, file)
    for file in [
        "vis_data/{}_vis_{}.sav".format(obsid, pol),
        "vis_data/{}_vis_model_{}.sav".format(obsid, pol),
        "vis_data/{}_flags.sav".format(obsid),
        "metadata/{}_params.sav".format(obsid),
        "metadata/{}_settings.txt".format(obsid),
        "metadata/{}_layout.sav".format(obsid),
    ]
]
data_filelist = [
    "{}/{}".format(data_path, file)
    for file in [
        "vis_data/{}_vis_{}.sav".format(obsid, pol),
        "vis_data/{}_vis_model_{}.sav".format(obsid, pol),
        "vis_data/{}_flags.sav".format(obsid),
        "metadata/{}_params.sav".format(obsid),
        "metadata/{}_settings.txt".format(obsid),
        "metadata/{}_layout.sav".format(obsid),
    ]
]

model = pyuvdata.UVData()
print("Reading model...")
model.read_fhd(model_filelist, use_model=model_use_model)

# For testing, use one time and a few frequencies only
all_times = np.unique(model.time_array)
use_times = all_times[int(model.Ntimes/2):int(model.Ntimes/2+3)]
use_frequencies = model.freq_array[0, 100:110]
model.select(times=use_times, frequencies=use_frequencies)

if not use_autos:  # Remove autocorrelations
    bl_lengths = np.sqrt(np.sum(model.uvw_array ** 2.0, axis=1))
    non_autos = np.where(bl_lengths > 0.01)[0]
    model.select(blt_inds=non_autos)

if data_path != model_path or model_use_model != data_use_model:
    data = pyuvdata.UVData()
    print("Reading data...")
    data.read_fhd(data_filelist, use_model=data_use_model)
    print("Done.")
    data.select(times=use_times, frequencies=use_frequencies)
    if not use_autos:  # Remove autocorrelations
        bl_lengths = np.sqrt(np.sum(data.uvw_array ** 2.0, axis=1))
        non_autos = np.where(bl_lengths > 0.01)[0]
        data.select(blt_inds=non_autos)
else:
    print("Using model for data")
    data = model.copy()

# Ensure ordering matches between the data and model
if np.max(np.abs(data.baseline_array - model.baseline_array)) > 0.0:
    data.reorder_blts()
    model.reorder_blts()
if np.max(np.abs(data.freq_array - model.freq_array)) > 0.0:
    data.reorder_freqs(channel_order="freq")
    model.reorder_freqs(channel_order="freq")

Reading model...


Telescope location derived from obs lat/lon/alt values does not match the location in the layout file. Using the value from known_telescopes.
tile_names from obs structure does not match antenna_names from layout


Using model for data


In [3]:
# Format visibilities
data_visibilities = np.zeros((data.Ntimes, data.Nbls, data.Nfreqs), dtype=complex)
model_visibilities = np.zeros((data.Ntimes, data.Nbls, data.Nfreqs), dtype=complex)
for time_ind, time_val in enumerate(np.unique(data.time_array)):
    data_copy = data.copy()
    model_copy = model.copy()
    data_copy.select(times=time_val)
    model_copy.select(times=time_val)
    data_copy.reorder_blts()
    model_copy.reorder_blts()
    data_copy.reorder_freqs(channel_order="freq")
    model_copy.reorder_freqs(channel_order="freq")
    if time_ind == 0:
        metadata_reference = data_copy.copy(metadata_only=True)
    model_visibilities[time_ind, :, :] = np.squeeze(
        model_copy.data_array, axis=(1, 3)
    )
    data_visibilities[time_ind, :, :] = np.squeeze(
        data_copy.data_array, axis=(1, 3)
    )

# Create gains expand matrices
gains_exp_mat_1 = np.zeros((metadata_reference.Nbls, metadata_reference.Nants_data), dtype=int)
gains_exp_mat_2 = np.zeros((metadata_reference.Nbls, metadata_reference.Nants_data), dtype=int)
antenna_list = np.unique([metadata_reference.ant_1_array, metadata_reference.ant_2_array])
for baseline in range(metadata_reference.Nbls):
    gains_exp_mat_1[
        baseline, np.where(antenna_list == metadata_reference.ant_1_array[baseline])
    ] = 1
    gains_exp_mat_2[
        baseline, np.where(antenna_list == metadata_reference.ant_2_array[baseline])
    ] = 1

# Define covariance matrix
cov_mat = np.identity(metadata_reference.Nfreqs)
cov_mat = np.repeat(cov_mat[np.newaxis, :, :], metadata_reference.Nbls, axis=0)
cov_mat = cov_mat.reshape(
    (metadata_reference.Nbls, metadata_reference.Nfreqs, metadata_reference.Nfreqs)
)

In [4]:
def cost_function_dw_cal(
    x,
    Nants,
    Nfreqs,
    Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
):

    gains = np.reshape(x, (2, Nants, Nfreqs,))
    gains = (
        gains[0,] + 1.0j * gains[1,]
    )

    gains_expanded = np.matmul(gains_exp_mat_1, gains) * np.matmul(
        gains_exp_mat_2, np.conj(gains)
    )
    res_vec = data_visibilities - gains_expanded[np.newaxis, :, :] * model_visibilities
    weighted_part2 = np.squeeze(np.matmul(res_vec[:, :, np.newaxis, :], cov_mat))
    cost = np.real(np.sum(np.conj(np.squeeze(res_vec)) * weighted_part2))

    print("Cost func. eval.")

    return cost


def jac_dw_cal(
    x,
    Nants,
    Nfreqs,
    Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
):

    gains = np.reshape(x, (2, Nants, Nfreqs))
    gains = (
        gains[0,] + 1.0j * gains[1,]
    )

    gains1_expanded = np.matmul(gains_exp_mat_1, gains)
    gains2_expanded = np.matmul(gains_exp_mat_2, gains)
    term1_part1 = gains1_expanded[np.newaxis, :, :] * model_visibilities
    term2_part1 = gains2_expanded[np.newaxis, :, :] * np.conj(model_visibilities)
    cost_term = (
        data_visibilities
        - gains1_expanded * np.conj(gains2_expanded) * model_visibilities
    )
    weighted_part2 = np.squeeze(np.matmul(cost_term[:, :, np.newaxis, :], cov_mat))
    term1 = np.sum(
        np.matmul(gains_exp_mat_2.T, term1_part1 * np.conj(weighted_part2)), axis=0
    )
    term2 = np.sum(np.matmul(gains_exp_mat_1.T, term2_part1 * weighted_part2), axis=0)
    grad = -2 * (term1 + term2)

    grad = np.stack((np.real(grad), np.imag(grad)), axis=0).flatten()

    return grad


def reformat_baselines_to_antenna_matrix(bl_array, gains_exp_mat_1, gains_exp_mat_2):
    # Reformat an array indexed in baselines into a matrix with antenna indices

    (Nbls, Nants) = np.shape(gains_exp_mat_1)
    antenna_matrix = np.zeros_like(bl_array[0,], dtype=bl_array.dtype)
    antenna_matrix = np.repeat(
        np.repeat(antenna_matrix[np.newaxis,], Nants, axis=0)[np.newaxis,],
        Nants,
        axis=0,
    )
    antenna_numbers = np.arange(Nants)
    antenna1_num = np.matmul(gains_exp_mat_1, antenna_numbers)
    antenna2_num = np.matmul(gains_exp_mat_2, antenna_numbers)
    for bl_ind in range(Nbls):
        antenna_matrix[antenna1_num[bl_ind], antenna2_num[bl_ind],] = bl_array[
            bl_ind,
        ]
    return antenna_matrix


def hess_dw_cal(
    x,
    Nants,
    Nfreqs,
    Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
):

    gains = np.reshape(x, (2, Nants, Nfreqs))
    gains = (
        gains[0,] + 1.0j * gains[1,]
    )

    gains1_expanded = np.matmul(gains_exp_mat_1, gains)
    gains2_expanded = np.matmul(gains_exp_mat_2, gains)

    gains1_times_model = gains1_expanded[np.newaxis, :, :] * model_visibilities
    gains2_times_conj_model = gains2_expanded[np.newaxis, :, :] * np.conj(
        model_visibilities
    )

    term1 = np.sum(
        gains1_times_model[:, :, np.newaxis, :]
        * gains2_times_conj_model[:, :, :, np.newaxis]
        * np.conj(cov_mat[np.newaxis, :, :, :]),
        axis=0
    )
    term1 = reformat_baselines_to_antenna_matrix(
        term1, gains_exp_mat_1, gains_exp_mat_2
    )
    term1 = np.transpose(term1, (1, 0, 2, 3))

    term2 = np.sum(
        gains2_times_conj_model[:, :, np.newaxis, :]
        * gains1_times_model[:, :, :, np.newaxis]
        * cov_mat[np.newaxis, :, :, :],
        axis=0
    )
    term2 = reformat_baselines_to_antenna_matrix(
        term2, gains_exp_mat_1, gains_exp_mat_2
    )
    terms1and2 = 2 * (term1 + term2)

    # hess elements are ant_c, ant_d, freq_f0, freq_f1, and real/imag pair
    # The real/imag pairs are in order [real-real, real-imag, and imag-imag]
    hess = np.zeros((Nants, Nants, Nfreqs, Nfreqs, 3), dtype=float)
    hess[:, :, :, :, 0] = np.real(terms1and2)
    hess[:, :, :, :, 1] = np.imag(terms1and2)
    hess[:, :, :, :, 2] = -np.real(terms1and2)

    term3 = np.sum(
        np.conj(model_visibilities)
        * np.sum(
            cov_mat[np.newaxis, :, :, :]
            * (
                data_visibilities
                - gains1_expanded[np.newaxis, :, :]
                * np.conj(gains2_expanded[np.newaxis, :, :])
                * model_visibilities
            )[:, :, :, np.newaxis],
            axis=3,
        ),
        axis=0,
    )
    term3 = reformat_baselines_to_antenna_matrix(
        term3, gains_exp_mat_1, gains_exp_mat_2
    )
    term4 = np.transpose(np.conj(term3), (1, 0, 2))
    terms3and4 = -2 * (term3 + term4)
    for freq in range(Nfreqs):
        hess[:, :, freq, freq, 0] += np.real(terms3and4[:, :, freq])
        hess[:, :, freq, freq, 1] -= np.imag(terms3and4[:, :, freq])
        hess[:, :, freq, freq, 2] += np.real(terms3and4[:, :, freq])
        
    hess_reformatted = np.zeros((2, Nants * Nfreqs, 2, Nants * Nfreqs), dtype=float)
    hess_reformatted[0, :, 0, :] = np.transpose(hess[:, :, :, :, 0], (0,2,1,3)).reshape(
        Nants * Nfreqs, Nants * Nfreqs
    )
    hess_reformatted[0, :, 1, :] = np.transpose(hess[:, :, :, :, 1], (0,2,1,3)).reshape(
        Nants * Nfreqs, Nants * Nfreqs
    )
    hess_reformatted[1, :, 0, :] = np.transpose(hess[:, :, :, :, 1], (1,3,0,2)).reshape(
        Nants * Nfreqs, Nants * Nfreqs
    )
    hess_reformatted[1, :, 1, :] = np.transpose(hess[:, :, :, :, 2], (0,2,1,3)).reshape(
        Nants * Nfreqs, Nants * Nfreqs
    )
    del hess
    hess_reformatted = hess_reformatted.reshape(2 * Nants * Nfreqs, 2 * Nants * Nfreqs)

    return hess_reformatted

In [5]:
# Initialize gains
gain_init_noise = 0.01
gains_init = np.random.normal(
    1.0, gain_init_noise, size=(metadata_reference.Nants_data, metadata_reference.Nfreqs)
) + 1.0j * np.random.normal(0.0, gain_init_noise, size=(metadata_reference.Nants_data, metadata_reference.Nfreqs))

In [6]:
# Test gradient calculation, real part
test_ant = 100
test_freq = 1
delta_gains = 0.0001

gains0 = np.copy(gains_init)
gains0[test_ant, test_freq] -= delta_gains/2.
gains1 = np.copy(gains_init)
gains1[test_ant, test_freq] += delta_gains/2.

gains_init_flattened = np.stack((np.real(gains_init), np.imag(gains_init)), axis=0).flatten()
gains0_flattened = np.stack((np.real(gains0), np.imag(gains0)), axis=0).flatten()
gains1_flattened = np.stack((np.real(gains1), np.imag(gains1)), axis=0).flatten()

cost_func0 = cost_function_dw_cal(
    gains0_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
cost_func1 = cost_function_dw_cal(
    gains1_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)

grad = jac_dw_cal(
    gains_init_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad = np.reshape(grad, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))

print((cost_func1-cost_func0)/delta_gains)
print(grad[0, test_ant, test_freq])

Cost func. eval.
Cost func. eval.
-431.9267143000616
-431.9267143026311


In [7]:
# Test gradient calculation, imaginary part
test_ant = 100
test_freq = 1
delta_gains = 0.0001

gains0 = np.copy(gains_init)
gains0[test_ant, test_freq] -= 1j*delta_gains/2.
gains1 = np.copy(gains_init)
gains1[test_ant, test_freq] += 1j*delta_gains/2.

gains_init_flattened = np.stack((np.real(gains_init), np.imag(gains_init)), axis=0).flatten()
gains0_flattened = np.stack((np.real(gains0), np.imag(gains0)), axis=0).flatten()
gains1_flattened = np.stack((np.real(gains1), np.imag(gains1)), axis=0).flatten()

cost_func0 = cost_function_dw_cal(
    gains0_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
cost_func1 = cost_function_dw_cal(
    gains1_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)

grad = jac_dw_cal(
    gains_init_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad = np.reshape(grad, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))

print((cost_func1-cost_func0)/delta_gains)
print(grad[1, test_ant, test_freq])

Cost func. eval.
Cost func. eval.
-368.55574893706944
-368.5557488726153


In [8]:
# Test Hessian calculation, real-real part
test_ant = 100
test_freq = 1
readout_ant = 5
readout_freq = 1
delta_gains = 0.0001

gains0 = np.copy(gains_init)
gains0[test_ant, test_freq] -= delta_gains/2.
gains1 = np.copy(gains_init)
gains1[test_ant, test_freq] += delta_gains/2.

gains_init_flattened = np.stack((np.real(gains_init), np.imag(gains_init)), axis=0).flatten()
gains0_flattened = np.stack((np.real(gains0), np.imag(gains0)), axis=0).flatten()
gains1_flattened = np.stack((np.real(gains1), np.imag(gains1)), axis=0).flatten()

grad0 = jac_dw_cal(
    gains0_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad1 = jac_dw_cal(
    gains1_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad0 = np.reshape(grad0, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))
grad1 = np.reshape(grad1, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))

hess = hess_dw_cal(
    gains_init_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
hess = np.reshape(hess, (
    2, metadata_reference.Nants_data, metadata_reference.Nfreqs,
    2, metadata_reference.Nants_data, metadata_reference.Nfreqs
))

print((grad1[0, readout_ant, readout_freq]-grad0[0, readout_ant, readout_freq])/delta_gains)
print(hess[0, test_ant, test_freq, 0, readout_ant, readout_freq])

2112.2418759205175
2112.2418759248203


In [9]:
# Test Hessian calculation, real-imaginary part
test_ant = 100
test_freq = 1
readout_ant = 5
readout_freq = 1
delta_gains = 0.0001

gains0 = np.copy(gains_init)
gains0[test_ant, test_freq] -= delta_gains/2.
gains1 = np.copy(gains_init)
gains1[test_ant, test_freq] += delta_gains/2.

gains_init_flattened = np.stack((np.real(gains_init), np.imag(gains_init)), axis=0).flatten()
gains0_flattened = np.stack((np.real(gains0), np.imag(gains0)), axis=0).flatten()
gains1_flattened = np.stack((np.real(gains1), np.imag(gains1)), axis=0).flatten()

grad0 = jac_dw_cal(
    gains0_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad1 = jac_dw_cal(
    gains1_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad0 = np.reshape(grad0, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))
grad1 = np.reshape(grad1, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))

hess = hess_dw_cal(
    gains_init_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
hess = np.reshape(hess, (
    2, metadata_reference.Nants_data, metadata_reference.Nfreqs,
    2, metadata_reference.Nants_data, metadata_reference.Nfreqs
))

print((grad1[1, readout_ant, readout_freq]-grad0[1, readout_ant, readout_freq])/delta_gains)
print(hess[0, test_ant, test_freq, 1, readout_ant, readout_freq])

-39.046753281581914
-39.046753286453125


In [10]:
# Test Hessian calculation, imaginary-imaginary part
test_ant = 100
test_freq = 1
readout_ant = 5
readout_freq = 1
delta_gains = 0.0001

gains0 = np.copy(gains_init)
gains0[test_ant, test_freq] -= 1j*delta_gains/2.
gains1 = np.copy(gains_init)
gains1[test_ant, test_freq] += 1j*delta_gains/2.

gains_init_flattened = np.stack((np.real(gains_init), np.imag(gains_init)), axis=0).flatten()
gains0_flattened = np.stack((np.real(gains0), np.imag(gains0)), axis=0).flatten()
gains1_flattened = np.stack((np.real(gains1), np.imag(gains1)), axis=0).flatten()

grad0 = jac_dw_cal(
    gains0_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad1 = jac_dw_cal(
    gains1_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
grad0 = np.reshape(grad0, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))
grad1 = np.reshape(grad1, (2, metadata_reference.Nants_data, metadata_reference.Nfreqs))

hess = hess_dw_cal(
    gains_init_flattened,
    metadata_reference.Nants_data,
    metadata_reference.Nfreqs,
    metadata_reference.Nbls,
    model_visibilities,
    gains_exp_mat_1,
    gains_exp_mat_2,
    cov_mat,
    data_visibilities,
)
hess = np.reshape(hess, (
    2, metadata_reference.Nants_data, metadata_reference.Nfreqs,
    2, metadata_reference.Nants_data, metadata_reference.Nfreqs
))

print((grad1[1, readout_ant, readout_freq]-grad0[1, readout_ant, readout_freq])/delta_gains)
print(hess[1, test_ant, test_freq, 1, readout_ant, readout_freq])

-2138.332124968656
-2138.3321249661176


NameError: name 'freq_array' is not defined