In [12]:
import numpy as np
import pyuvdata
import sys
sys.path.append("/opt/devel/rbyrne/rlb_LWA/LWA_data_preprocessing")
import LWA_preprocessing
import os
from calico import (
    calibration_wrappers,
    calibration_optimization,
    cost_function_calculations,
    calibration_qa,
    caldata,
)
import importlib
importlib.reload(calibration_wrappers)
importlib.reload(calibration_optimization)
importlib.reload(cost_function_calculations)
importlib.reload(calibration_qa)
importlib.reload(caldata)


<module 'calico.caldata' from '/opt/devel/rbyrne/envs/py310/lib/python3.10/site-packages/calico/caldata.py'>

In [13]:
freq_band = "41"
flag_ants = [
    "LWA009",
    "LWA041",
    "LWA044",
    "LWA052",
    "LWA058",
    "LWA076",
    "LWA095",
    "LWA105",
    "LWA111",
    "LWA120",
    "LWA124",
    "LWA138",
    "LWA150",
    "LWA159",
    "LWA191",
    "LWA204",
    "LWA208",
    "LWA209",
    "LWA232",
    "LWA234",
    "LWA255",
    "LWA267",
    "LWA280",
    "LWA288",
    "LWA292",
    "LWA302",
    "LWA307",
    "LWA309",
    "LWA310",
    "LWA314",
    "LWA325",
    "LWA341",
    "LWA352",
    "LWA364",
    "LWA365",
]

data_filepath = f"/lustre/rbyrne/2024-03-03/20240303_093000-093151_{freq_band}MHz.ms"  # Created with concatenate_ms_files.py

# Convert to uvdata object
uv = pyuvdata.UVData()
print(f"Reading file {data_filepath}.")
uv.read(data_filepath, data_column="DATA")
uv.select(frequencies = np.min(uv.freq_array))  # Added for debugging
uv.set_uvws_from_antenna_positions(update_vis=False)
uv.data_array = np.conj(uv.data_array)
uv.phase_to_time(np.mean(uv.time_array))

# Flag antennas
LWA_preprocessing.flag_antennas(
    uv,
    antenna_names=flag_ants,
    flag_pol="all",  # Options are "all", "X", "Y", "XX", "YY", "XY", or "YX"
    inplace=True,
)

model_file_name = data_filepath.replace(".ms", "_model.ms")
combined_model_uv = pyuvdata.UVData()
print(f"Reading file {model_file_name}.")
combined_model_uv.read(model_file_name)
combined_model_uv.select(frequencies = np.min(uv.freq_array))  # Added for debugging

data = uv
model = combined_model_uv

Reading file /lustre/rbyrne/2024-03-03/20240303_093000-093151_41MHz.ms.


The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 4024.7789143058576 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 4024.7789143058576 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
Recalculating uvw_array without adjusting visibility phases -- this can introduce significant errors if used incorrectly.


Reading file /lustre/rbyrne/2024-03-03/20240303_093000-093151_41MHz_model.ms.


The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9095988568870794 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9095988568870794 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.


In [14]:
data.write_uvfits("/opt/devel/rbyrne/calico/calico/data/ovro-lwa_data_1freq.uvfits")
model.write_uvfits("/opt/devel/rbyrne/calico/calico/data/ovro-lwa_model_1freq.uvfits")

antnums_to_baseline: found antenna numbers > 255, using 2048 baseline indexing.
Found antenna numbers > 255 in this data set. This is permitted by UVFITS standards, but may cause the `importuvfits` utility within CASA to crash. If attempting to use this data set in CASA, consider using the measurement set writer method (`write_ms`) instead.
The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9095988568870794 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
antnums_to_baseline: found antenna numbers > 255, using 2048 baseline indexing.
Found antenna numbers > 255 in this data set. This is permitted by UVFITS standards, but may cause the `importuvfits` utility within CASA to crash. If attempting to use this data set in CASA, consider using the measurement set writer method (`write_ms`) instead.


In [15]:
# Ensure data and model are phased the same
data.phase_to_time(np.mean(data.time_array))
model.phase_to_time(np.mean(data.time_array))

min_cal_baseline_lambda=10
max_cal_baseline_lambda=125
gains_multiply_model=False
verbose=True
get_crosspol_phase=False
log_file_path=None
xtol=1e-4
maxiter=5
antenna_flagging_iterations=0
parallel=False
lambda_val=100

caldata_obj = caldata.CalData()
caldata_obj.load_data(
    data,
    model,
    gain_init_calfile=None,
    gain_init_to_vis_ratio=True,
    gains_multiply_model=gains_multiply_model,
    gain_init_stddev=0,
    N_feed_pols=2,
    feed_polarization_array=None,
    min_cal_baseline_m=None,
    max_cal_baseline_m=None,
    min_cal_baseline_lambda=min_cal_baseline_lambda,
    max_cal_baseline_lambda=max_cal_baseline_lambda,
    lambda_val=lambda_val,
)

The entry name zenith_at_jd2460372.896482 is not unique inside the phase center catalog, adding anyways.
divide by zero encountered in divide


In [16]:
freq_ind = 0
feed_pol_ind = 0
feed_pol = -5

vis_pol_ind = np.where(caldata_obj.vis_polarization_array == feed_pol)[0]

vis_weights_summed = np.sum(
    caldata_obj.visibility_weights[:, :, freq_ind, feed_pol_ind], axis=0
)  # Sum over times
weight_per_ant = np.bincount(
    caldata_obj.ant1_inds,
    weights=vis_weights_summed,
    minlength=caldata_obj.Nants,
) + np.bincount(
    caldata_obj.ant2_inds,
    weights=vis_weights_summed,
    minlength=caldata_obj.Nants,
)
ant_inds = np.where(weight_per_ant > 0.0)[0]

gains_init_flattened = np.stack(
    (
        np.real(caldata_obj.gains[ant_inds, freq_ind, feed_pol_ind]),
        np.imag(caldata_obj.gains[ant_inds, freq_ind, feed_pol_ind]),
    ),
    axis=1,
).flatten()

cost = calibration_optimization.cost_skycal_wrapper(
    gains_init_flattened,
    caldata_obj,
    ant_inds,
    freq_ind,
    vis_pol_ind,
)
print(cost)

903851196927.4274


In [17]:
len(gains_init_flattened)

618

In [25]:
for use_ind in range(len(gains_init_flattened)):
    delta_gain = 1e-4
    gains_init_0 = np.copy(gains_init_flattened)
    gains_init_1 = np.copy(gains_init_flattened)
    gains_init_0[use_ind] -= delta_gain/2
    gains_init_1[use_ind] += delta_gain/2
    cost0 = calibration_optimization.cost_skycal_wrapper(
        gains_init_0,
        caldata_obj,
        ant_inds,
        freq_ind,
        vis_pol_ind,
    )
    cost1 = calibration_optimization.cost_skycal_wrapper(
        gains_init_1,
        caldata_obj,
        ant_inds,
        freq_ind,
        vis_pol_ind,
    )
    approx_jac = (cost1-cost0)/delta_gain
    print(approx_jac)
    jac = calibration_optimization.jacobian_skycal_wrapper(
        gains_init_flattened,
        caldata_obj,
        ant_inds,
        freq_ind,
        vis_pol_ind,
    )
    print(jac[use_ind])
    print("")
    print(np.testing.assert_allclose(approx_jac, jac[use_ind], rtol=1e-3))

32823540.0390625
32823540.833335333

None
-13218717.041015625
-13218715.858841492

None
9440179.443359375
9440179.295155535

None
-2073081.0546875
-2073080.715417299

None
39891744.384765625
39891744.03045926

None
-6586734.619140625
-6586735.558741173

None
17094584.9609375
17094588.120795786

None
-10041964.111328125
-10041965.670986416

None
45676214.599609375
45676214.32518894

None
6218745.1171875
6218743.948383548

None
27130329.58984375
27130331.386211023

None
-4156705.322265625
-4156705.084506588

None
9904241.943359375
9904242.961076645

None
5929150.390625
5929148.963023839

None
20974515.380859375
20974516.073714983

None
12216668.701171875
12216668.237807617

None
39740153.80859375
39740153.70453465

None
412786.865234375
412787.640378527

None
25447192.3828125
25447190.81645989

None
-14675634.765625
-14675633.918358952

None
16992136.23046875
16992135.760584425

None
-14525084.228515625
-14525085.365598662

None
25001236.572265625
25001235.751264367

None
7167048.3398437

In [None]:
use_ind = 200
delta_gain = 1e-4
gains_init_0 = np.copy(gains_init_flattened)
gains_init_1 = np.copy(gains_init_flattened)
gains_init_0[use_ind] -= delta_gain/2
gains_init_1[use_ind] += delta_gain/2
jac0 = calibration_optimization.jacobian_skycal_wrapper(
    gains_init_0,
    caldata_obj,
    ant_inds,
    freq_ind,
    vis_pol_ind,
)
jac1 = calibration_optimization.jacobian_skycal_wrapper(
    gains_init_1,
    caldata_obj,
    ant_inds,
    freq_ind,
    vis_pol_ind,
)
approx_hess = (jac1-jac0)/delta_gain
hess = calibration_optimization.hessian_skycal_wrapper(
    gains_init_flattened,
    caldata_obj,
    ant_inds,
    freq_ind,
    vis_pol_ind,
)

for ind in range(len(approx_hess)):
    print(approx_hess[ind])
    print(hess[ind, use_ind])
    print("")


951.0701894760132
951.0701879250157

-407.10337460041046
-407.10339180999324

0.0
0.0

0.0
0.0

2081.068381667137
2081.0683393471863

-718.2207610458136
-718.2207564343321

1416.071318089962
1416.0713159133088

-749.1075247526169
-749.1075386483953

1487.7060055732727
1487.7060265234975

462.8576524555683
462.8576551422259

1786.2337455153465
1786.2337481850225

-948.3838360756636
-948.3838394218816

599.0470573306084
599.0470525906511

579.658281058073
579.6582834755134

391.5177658200264
391.51775250534604

-981.392040848732
-981.3920622012439

2068.464607000351
2068.4646178046423

4.100073128938675
4.100077121475238

2159.56661850214
2159.5666093101845

-816.0276710987091
-816.0276785030069

-55.13288080692291
-55.132869518056395

311.10212206840515
311.1021407947062

2806.2883764505386
2806.2883991043673

-65.87658077478409
-65.87659260945293

3330.8840915560722
3330.884101007003

2787.293903529644
2787.2939255404062

3394.986540079117
3394.986523425833

1015.0177218019962
1015.017

In [20]:
Nants_unflagged = len(ant_inds)
gains_reshaped = np.reshape(gains_init_flattened, (Nants_unflagged, 2))
gains_reshaped = gains_reshaped[:, 0] + 1.0j * gains_reshaped[:, 1]
gains = np.ones((caldata_obj.Nants), dtype=complex)
gains[ant_inds] = gains_reshaped

use_ind = 100
gains0 = np.copy(gains)
gains1 = np.copy(gains)
gains0[use_ind] -= 1j*delta_gain/2
gains1[use_ind] += 1j*delta_gain/2

jac0 = cost_function_calculations.jacobian_skycal(
    gains0,
    np.reshape(
        caldata_obj.model_visibilities[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    np.reshape(
        caldata_obj.data_visibilities[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    np.reshape(
        caldata_obj.visibility_weights[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    caldata_obj.ant1_inds,
    caldata_obj.ant2_inds,
    caldata_obj.lambda_val,
)
jac1 = cost_function_calculations.jacobian_skycal(
    gains1,
    np.reshape(
        caldata_obj.model_visibilities[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    np.reshape(
        caldata_obj.data_visibilities[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    np.reshape(
        caldata_obj.visibility_weights[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    caldata_obj.ant1_inds,
    caldata_obj.ant2_inds,
    caldata_obj.lambda_val,
)
approx_hess = (jac1-jac0)/delta_gain

(
    hess_real_real,
    hess_real_imag,
    hess_imag_imag,
) = cost_function_calculations.hessian_skycal(
    gains,
    caldata_obj.Nants,
    caldata_obj.Nbls,
    np.reshape(
        caldata_obj.model_visibilities[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    np.reshape(
        caldata_obj.data_visibilities[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    np.reshape(
        caldata_obj.visibility_weights[:, :, freq_ind, vis_pol_ind],
        (caldata_obj.Ntimes, caldata_obj.Nbls),
    ),
    caldata_obj.ant1_inds,
    caldata_obj.ant2_inds,
    caldata_obj.lambda_val,
)

for ind in range(len(approx_hess)):
    print(np.real(approx_hess[ind]))
    print(hess_real_imag[ind, use_ind])
    print("")

555.9375137090683
555.9374944634518

0.0
0.0

-1541.769653558731
-1541.7696857166347

0.0
0.0

902.3316949605942
902.3317092749708

-5564.789101481438
-5564.78904972631

49.76235330104828
49.76233050999679

0.0
0.0

-313.09690326452255
-313.0969220683912

-2644.398957490921
-2644.398817907041

-703.3253461122513
-703.3253283406136

898.1026709079742
898.1026713384244

0.0
0.0

-29.020532965660095
-29.02052926644808

0.0
0.0

680.0198182463646
680.019824181353

-303.1868487596512
-303.1868610330117

0.0
0.0

-2125.3741160035133
-2125.374112743629

-1064.5154118537903
-1064.5153982384252

0.0
0.0

0.0
0.0

1355.3745113313198
1355.3745109251959

0.0
0.0

-294.174887239933
-294.1748692393223

-582.026019692421
-582.026020089563

-6.414912641048431
-6.414921442694563

446.6921929270029
446.692196450296

0.0
0.0

0.0
0.0

-761.6849988698959
-761.6850182942483

0.0
0.0

-894.7416394948959
-894.741651479218

0.0
0.0

1021.4522480964661
1021.452265074948

0.0
0.0

-419.1574454307556
-419.157421

In [21]:
np.stack(
    ([1,1,1], [2,2,2]), axis=1
).flatten()

array([1, 2, 1, 2, 1, 2])

In [None]:
data = pyuvdata.UVData()
data.read(f"/opt/devel/rbyrne/calico/calico/data/ovro-lwa_data_1freq.uvfits")  # Use data with fully flagged antennas
model = pyuvdata.UVData()
model.read(f"/opt/devel/rbyrne/calico/calico/data/ovro-lwa_model_1freq.uvfits")

In [41]:
test_ant_ind = 10
test_freq_ind = 0
test_pol_ind = 0
delta_gain = 1e-2
lambda_val = 0  # Don't test regularization
gain_stddev = 0.1

caldata_obj = caldata.CalData()
caldata_obj.load_data(data, model, lambda_val=lambda_val)

np.random.seed(0)
gains_init_real = np.random.normal(
    1.0,
    gain_stddev,
    size=(caldata_obj.Nants, caldata_obj.Nfreqs),
)
np.random.seed(0)
gains_init_imag = 1.0j * np.random.normal(
    0.0,
    gain_stddev,
    size=(caldata_obj.Nants, caldata_obj.Nfreqs),
)
gains_init = gains_init_real + gains_init_imag

gains_init0 = np.copy(gains_init[:, test_freq_ind])
gains_init0[test_ant_ind] -= delta_gain / 2
cost0 = cost_function_calculations.cost_skycal(
    gains_init0,
    caldata_obj.model_visibilities[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.data_visibilities[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.visibility_weights[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.ant1_inds,
    caldata_obj.ant2_inds,
    caldata_obj.lambda_val,
)
gains_init1 = np.copy(gains_init[:, test_freq_ind])
gains_init1[test_ant_ind] += delta_gain / 2
cost1 = cost_function_calculations.cost_skycal(
    gains_init1,
    caldata_obj.model_visibilities[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.data_visibilities[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.visibility_weights[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.ant1_inds,
    caldata_obj.ant2_inds,
    caldata_obj.lambda_val,
)
jac = cost_function_calculations.jacobian_skycal(
    gains_init[:, test_freq_ind],
    caldata_obj.model_visibilities[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.data_visibilities[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.visibility_weights[:, :, test_freq_ind, test_pol_ind],
    caldata_obj.ant1_inds,
    caldata_obj.ant2_inds,
    caldata_obj.lambda_val,
)

grad_approx = (cost1 - cost0) / delta_gain
jac_value = np.real(jac[test_ant_ind])
print(f"Gradient approximation value: {grad_approx}")
print(f"Jacobian value: {jac_value}")

The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9095992243307833 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9095992243307833 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9090858720540052 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
The uvw_array does not match the expected values given the antenna positions. The largest discrepancy is 1.9090858720540052 meters. This is a fairly common situation but might indicate an error in the antenna positions, the uvws or the phasing.
The uvw_array does n

Gradient approximation value: 2123.92578125
Jacobian value: 2123.989589503435


divide by zero encountered in divide


In [38]:
print(cost0)
print(cost1)

3370154986101.6914
3370154988225.681


In [32]:
vis_weights_summed = np.sum(
    caldata_obj.visibility_weights[:, :, freq_ind, feed_pol_ind], axis=0
)  # Sum over times
weight_per_ant = np.bincount(
    caldata_obj.ant1_inds,
    weights=vis_weights_summed,
    minlength=caldata_obj.Nants,
) + np.bincount(
    caldata_obj.ant2_inds,
    weights=vis_weights_summed,
    minlength=caldata_obj.Nants,
)

In [36]:
print(weight_per_ant[test_ant_ind])

3694.0
