In [1]:
import os
import glob
import csv
import datetime
from pathlib import Path
from multiprocess import Pool, cpu_count

import numpy as np
from scipy import stats
from scipy.stats import chi2
from scipy.optimize import curve_fit
from scipy.interpolate import splrep, splev
from tqdm.rich import tqdm

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from astropy.io import fits
from astropy.wcs import WCS  # may be used for future extensions
from sherpa.astro.ui import *  # Sherpa UI shortcuts
from ciao_contrib.runtool import *  # CIAO tools

In [2]:
def refined_centroid(x, y, x0, y0,
                     iterations=3,
                     size_list=None,
                     binsize_list=None,
                     pixel_scale=0.1318,
                     mask_arcsec=None,
                     min_points=20):
    """
    Iteratively refine centroid by Gaussian-fitting 1D histograms.
    """
    # defaults for window sizes and bins
    span = max(np.ptp(x), np.ptp(y)) or 1.0
    if size_list is None:
        size_list = [span * 0.5**i for i in range(iterations)]
    if binsize_list is None:
        binsize_list = [w / 50.0 for w in size_list]

    def _gauss(z, A, mu, sig, off):
        return A * np.exp(-0.5 * ((z - mu)/sig)**2) + off

    cx, cy = x0, y0
    sig_cx = sig_cy = 0.0

    for size, binw in zip(size_list, binsize_list):
        # rectangular mask
        mask = (np.abs(x - cx) < size) & (np.abs(y - cy) < size)
        # optional radial mask
        if mask_arcsec is not None:
            r = np.hypot(x - cx, y - cy) * pixel_scale
            mask &= (r < mask_arcsec)

        xi, yi = x[mask], y[mask]
        if xi.size < min_points:
            break

        # fit X projection
        bins_x = np.arange(cx - size, cx + size + binw, binw)
        hist_x, _ = np.histogram(xi, bins=bins_x)
        centers_x = 0.5 * (bins_x[:-1] + bins_x[1:])
        p0x = [hist_x.max(), centers_x[hist_x.argmax()], binw, np.median(hist_x)]
        bounds_x = ([0, centers_x.min(), binw/10, 0],
                    [np.inf, centers_x.max(), size, hist_x.max()*2])
        try:
            popx, covx = curve_fit(_gauss, centers_x, hist_x, p0=p0x,
                                   bounds=bounds_x, maxfev=10000)
            cx, sig_cx = popx[1], np.sqrt(covx[1, 1])
        except Exception:
            cx, sig_cx = np.median(xi), 0.0

        # fit Y projection
        bins_y = np.arange(cy - size, cy + size + binw, binw)
        hist_y, _ = np.histogram(yi, bins=bins_y)
        centers_y = 0.5 * (bins_y[:-1] + bins_y[1:])
        p0y = [hist_y.max(), centers_y[hist_y.argmax()], binw, np.median(hist_y)]
        bounds_y = ([0, centers_y.min(), binw/10, 0],
                    [np.inf, centers_y.max(), size, hist_y.max()*2])
        try:
            popy, covy = curve_fit(_gauss, centers_y, hist_y, p0=p0y,
                                   bounds=bounds_y, maxfev=10000)
            cy, sig_cy = popy[1], np.sqrt(covy[1, 1])
        except Exception:
            cy, sig_cy = np.median(yi), 0.0

    return cx, cy, sig_cx, sig_cy


def load_core_events(path, r_arcsec=20.):
    """Load events within r_arcsec of the median position."""
    data = fits.getdata(path, 1)
    t, x, y = data['time'], data['x'], data['y']
    t -= t.min()
    x0, y0 = np.median(x), np.median(y)
    mask = np.hypot(x - x0, y - y0) * PIXEL_SCALE < r_arcsec
    return t[mask], x[mask], y[mask]


def per_bin_centroids(x, y, edges, x0, y0):
    """Compute centroids for each time bin."""
    cx = np.full(len(edges)-1, np.nan)
    cy = np.full(len(edges)-1, np.nan)
    for i, (lo, hi) in enumerate(zip(edges[:-1], edges[1:])):
        if hi - lo < 5:
            continue
        cx[i], cy[i], *_ = refined_centroid(
            x[lo:hi], y[lo:hi], x0, y0,
            iterations=3,
            pixel_scale=PIXEL_SCALE,
            mask_arcsec=20.0
        )
    return cx, cy


def _boot(args):
    """Helper for bootstrap centroiding."""
    x, y, x0, y0, seed = args
    np.random.seed(seed)
    idx = np.random.randint(0, len(x), len(x))
    return refined_centroid(x[idx], y[idx], x0, y0,
                            iterations=3, pixel_scale=PIXEL_SCALE,
                            mask_arcsec=20.0)[:2]


def bootstrap_sigmas(segments, nboot=200):
    """Bootstrap σ_x, σ_y for each segment."""
    seeds = np.random.randint(0, 2**32-1, (len(segments), nboot))
    jobs = [(x, y, x0, y0, s)
            for (x, y, x0, y0), srow in zip(segments, seeds)
            for s in srow if len(x)]
    with Pool(cpu_count()) as pool:
        results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))
    sig = []
    k = 0
    for seg in segments:
        if len(seg[0]):
            cx_vals = [r[0] for r in results[k:k+nboot]]
            cy_vals = [r[1] for r in results[k:k+nboot]]
            sig.append((np.std(cx_vals), np.std(cy_vals)))
            k += nboot
        else:
            sig.append((np.nan, np.nan))
    return sig[1:]  # skip global segment

In [7]:
DATA_DIR = '/Users/leodrake/Documents/MIT/ss433/HRC_2024/'  # base data folder
PIXEL_SCALE = 0.1318  # arcsec per pixel
MODE = 'obs'  # 'obs' or 'psf'
N_GROUPS = 100  # number of time bins
SMOOTH = 99  # spline smoothing parameter
RECOMPUTE = True  # force re-run of centroiding

os.chdir(DATA_DIR)
if MODE == 'obs':
    filelist = sorted(glob.glob('*/repro/*_repro_evt2.fits'))
    tag, plot_tag = '', ''
else:
    filelist = sorted(glob.glob('*/raytrace_projrays.fits'))
    tag, plot_tag = '-psf', 'Simulated PSF'

pdf_name = f'Jitter/hrc{tag}-jitter-{N_GROUPS}bins-raw.pdf'
tsv_name = f'Jitter/chisq{tag}-jitter-{N_GROUPS}bins-raw.tsv'

results_log = []
with PdfPages(pdf_name) as pdf:
    for f in filelist:
        obsid = Path(f).parts[-3] if MODE == 'obs' else Path(f).parts[-2]
        cache = Path('Jitter/cache')
        cache.mkdir(parents=True, exist_ok=True)
        cache_file = cache / f'{obsid}_{MODE}_{N_GROUPS}.npz'

        t, x, y = load_core_events(f)
        idx_sorted = np.argsort(t)
        t, x, y = t[idx_sorted], x[idx_sorted], y[idx_sorted]

        N = len(x)
        cts_bin = N // N_GROUPS
        edges = np.linspace(0, N, N_GROUPS + 1, dtype=int)

        print(f'Processing {obsid}: N={N}, per-bin={cts_bin}')

        if RECOMPUTE or not cache_file.exists():
            # global centroid
            x0, y0, *_ = refined_centroid(x, y, np.median(x), np.median(y),
                                         iterations=3, pixel_scale=PIXEL_SCALE, mask_arcsec=20.0)
            # per-bin centroids
            cx, cy = per_bin_centroids(x, y, edges, x0, y0)
            # bootstrap errors
            segments = [(x, y, x0, y0)] + [(x[lo:hi], y[lo:hi], x0, y0) for lo, hi in zip(edges[:-1], edges[1:])]
            sigx_px, sigy_px = zip(*bootstrap_sigmas(segments))
            sigx = np.array(sigx_px) * PIXEL_SCALE
            sigy = np.array(sigy_px) * PIXEL_SCALE

            # offsets and errors
            dX = (cx - x0) * PIXEL_SCALE
            dY = (cy - y0) * PIXEL_SCALE
            dR = np.hypot(dX, dY)
            sigR = np.hypot(sigx * np.cos(np.arctan2(dY, dX)),
                            sigy * np.sin(np.arctan2(dY, dX)))
            t_mid = np.array([t[lo:hi].mean() if hi-lo else np.nan for lo, hi in zip(edges[:-1], edges[1:])])

            np.savez_compressed(cache_file, obsid=obsid, N=N, cts_bin=cts_bin,
                                t=t_mid, dX=dX, dY=dY, dR=dR,
                                sigx=sigx, sigy=sigy, sigR=sigR)
        else:
            d = np.load(cache_file)
            N, cts_bin = int(d['N']), int(d['cts_bin'])
            t_mid = d['t']
            dX, dY, dR = d['dX'], d['dY'], d['dR']
            sigx, sigy, sigR = d['sigx'], d['sigy'], d['sigR']

        # spline fit and statistics
        trendX = splev(t_mid, splrep(t_mid, dX, w=1/sigx, s=SMOOTH, k=3))
        trendY = splev(t_mid, splrep(t_mid, dY, w=1/sigy, s=SMOOTH, k=3))
        chisq_spline = (((dX - trendX)/sigx)**2).sum() + (((dY - trendY)/sigy)**2).sum()
        m = len(t_mid)
        chi_norm = chisq_spline / m
        chisq_raw = ((dX/sigx)**2).sum() + ((dY/sigy)**2).sum()
        dof = 2*m - 2
        pval = 1 - chi2.cdf(chisq_raw, dof)
        rms = np.sqrt((dR**2).mean())
        print(f'χ²/dof={chisq_raw:.0f}/{dof}, p={pval:.4f}, RMS={rms:.2f}″, spline/dof={chi_norm:.4f}')

        results_log.append((obsid, N, cts_bin, f'{chisq_raw:.2f}', dof, f'{pval:.4f}', f'{rms:.2f}', f'{chisq_spline:.2f}'))

        # plotting raw offsets
        fig, axs = plt.subplots(3, 1, figsize=(6, 8), constrained_layout=True)
        for ax, raw, trend, err, label, color in zip(
                axs, [dX, dY, dR], [trendX, trendY, None], [sigx, sigy, sigR],
                ['ΔX', 'ΔY', 'ΔRadial'], ['dodgerblue', 'limegreen', 'k']):
            ax.axhline(0, ls=':', c='k')
            ax.errorbar(t_mid, raw, yerr=err, fmt='.', c=color, capsize=3)
            ax.set(title=f'{obsid} {plot_tag} {label}', ylabel='arcsec')
            ax.set_ylim((-0.05, 0.05) if MODE=='psf' else (-0.5, 0.5))
        axs[-1].set(xlabel='Time (s)')
        pdf.savefig(fig)
        plt.close(fig)

print(f'PDF saved to {pdf_name}')

# write TSV summary
with open(tsv_name, 'w', newline='') as fh:
    writer = csv.writer(fh, delimiter='\t')
    header = [f'# summary ({datetime.datetime.now():%Y-%m-%d %H:%M}), mode={MODE}']
    writer.writerow(header)
    writer.writerow(['ObsID', 'Core Counts', 'Counts/bin', 'χ²', 'dof', 'p-value', 'RMS', 'χ²_spline'])
    writer.writerows(results_log)

print(f'TSV saved to {tsv_name}')

Processing 26568: N=16325, per-bin=163


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=787/198, p=0.0000, RMS=0.13″, spline/dof=1.9796
Processing 26569: N=16843, per-bin=168


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=844/198, p=0.0000, RMS=0.13″, spline/dof=1.9803
Processing 26570: N=15302, per-bin=153


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=613/198, p=0.0000, RMS=0.13″, spline/dof=1.9793
Processing 26571: N=14250, per-bin=142


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=509/198, p=0.0000, RMS=0.13″, spline/dof=1.9802
Processing 26572: N=13456, per-bin=134


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=558/198, p=0.0000, RMS=0.14″, spline/dof=1.9797
Processing 26573: N=12729, per-bin=127


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=582/198, p=0.0000, RMS=0.15″, spline/dof=1.9805
Processing 26574: N=16376, per-bin=163


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=749/198, p=0.0000, RMS=0.13″, spline/dof=1.9805
Processing 26575: N=14061, per-bin=140


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=467/198, p=0.0000, RMS=0.14″, spline/dof=1.9810
Processing 26576: N=14095, per-bin=140


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=644/198, p=0.0000, RMS=0.14″, spline/dof=1.9798
Processing 26577: N=12527, per-bin=125


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=531/198, p=0.0000, RMS=0.15″, spline/dof=1.9801
Processing 26578: N=15655, per-bin=156


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=764/198, p=0.0000, RMS=0.16″, spline/dof=1.9802
Processing 26579: N=15993, per-bin=159


Output()

  results = list(tqdm(pool.imap_unordered(_boot, jobs), total=len(jobs), desc='Boot'))


χ²/dof=740/198, p=0.0000, RMS=0.13″, spline/dof=1.9797
PDF saved to Jitter/hrc-jitter-100bins-raw.pdf
TSV saved to Jitter/chisq-jitter-100bins-raw.tsv


In [8]:
quad_pdf = PdfPages(f'Jitter/hrc{tag}-jitter-{N_GROUPS}bins-splinefit.pdf')
for f in filelist:
    obsid = Path(f).parts[-3] if MODE=='obs' else Path(f).parts[-2]
    cache_file = Path('Jitter/cache') / f'{obsid}_{MODE}_{N_GROUPS}.npz'
    d = np.load(cache_file)
    t_mid, dX, dY = d['t'], d['dX'], d['dY']
    sigx, sigy = d['sigx'], d['sigy']

    # spline residuals
    trendX = splev(t_mid, splrep(t_mid, dX, w=1/sigx, s=SMOOTH, k=3))
    trendY = splev(t_mid, splrep(t_mid, dY, w=1/sigy, s=SMOOTH, k=3))
    resX, resY = dX - trendX, dY - trendY
    chisq_res = ((resX/sigx)**2).sum() + ((resY/sigy)**2).sum()
    dof_res = 2*len(resX) - 2
    pval_res = 1 - chi2.cdf(chisq_res, dof_res)
    rms_res = np.sqrt((resX**2 + resY**2).mean())
    print(f'{obsid}: χ²_res/dof={chisq_res:.0f}/{dof_res}, p={pval_res:.4f}, RMS={rms_res:.2f}″')

    # 1×4 panel: raw and residual
    fig, axes = plt.subplots(4, 1, figsize=(6, 12), constrained_layout=True)
    ylim = (-0.05,0.05) if MODE=='psf' else (-0.5,0.5)
    panels = [
        (dX, 'dodgerblue', f'{obsid} {plot_tag} ΔX', True, trendX, sigx),
        (resX, 'dodgerblue', 'ΔX residual', False, None, sigx),
        (dY, 'limegreen', f'{obsid} {plot_tag} ΔY', True, trendY, sigy),
        (resY, 'limegreen', 'ΔY residual', False, None, sigy),
    ]
    for ax, (raw, c, title, show_trend, tr, err) in zip(axes, panels):
        ax.axhline(0, ls=':', c='k')
        ax.errorbar(t_mid, raw, yerr=err, fmt='.', c=c, capsize=3)
        if show_trend:
            ax.plot(t_mid, tr, '-', c='red')
        ax.set(title=title, ylabel='arcsec', ylim=ylim)
    axes[-1].set(xlabel='Time (s)')
    quad_pdf.savefig(fig)
    plt.close(fig)
quad_pdf.close()
print(f'Saved spline-fit PDF to Jitter/hrc{tag}-jitter-{N_GROUPS}bins-splinefit.pdf')

26568: χ²_res/dof=198/198, p=0.4874, RMS=0.08″
26569: χ²_res/dof=198/198, p=0.4859, RMS=0.07″
26570: χ²_res/dof=198/198, p=0.4880, RMS=0.08″
26571: χ²_res/dof=198/198, p=0.4863, RMS=0.09″
26572: χ²_res/dof=198/198, p=0.4873, RMS=0.10″
26573: χ²_res/dof=198/198, p=0.4856, RMS=0.11″
26574: χ²_res/dof=198/198, p=0.4857, RMS=0.08″
26575: χ²_res/dof=198/198, p=0.4847, RMS=0.10″
26576: χ²_res/dof=198/198, p=0.4870, RMS=0.09″
26577: χ²_res/dof=198/198, p=0.4865, RMS=0.11″
26578: χ²_res/dof=198/198, p=0.4861, RMS=0.10″
26579: χ²_res/dof=198/198, p=0.4873, RMS=0.08″
Saved spline-fit PDF to Jitter/hrc-jitter-100bins-splinefit.pdf


In [9]:
for f in filelist:
    # load original events
    hdul = fits.open(f, mode='readonly')
    data = hdul[1].data
    times = data['time'] - data['time'].min()
    # load spline model from cache
    obsid = Path(f).parts[-3] if MODE=='obs' else Path(f).parts[-2]
    cache = np.load(Path('Jitter/cache')/f'{obsid}_{MODE}_{N_GROUPS}.npz')
    t_mid, dX, dY, sigx, sigy = cache['t'], cache['dX'], cache['dY'], cache['sigx'], cache['sigy']
    splX = splrep(t_mid, dX, w=1/sigx, s=SMOOTH, k=3)
    splY = splrep(t_mid, dY, w=1/sigy, s=SMOOTH, k=3)
    # evaluate trend at each event time
    trend_x = splev(times, splX)
    trend_y = splev(times, splY)
    # convert arcsec corrections to pixels
    dx_pix = trend_x / PIXEL_SCALE
    dy_pix = trend_y / PIXEL_SCALE
    # subtract spline drift from event coordinates
    data['x'] = data['x'] - dx_pix
    data['y'] = data['y'] - dy_pix
    # write out corrected event file
    outname = f.replace('.fits', '_splinecorr.fits')
    hdul.writeto(outname, overwrite=True)
    hdul.close()
    print(f'Corrected events written to {outname}')

Corrected events written to 26568/repro/hrcf26568_repro_evt2_splinecorr.fits
Corrected events written to 26569/repro/hrcf26569_repro_evt2_splinecorr.fits
Corrected events written to 26570/repro/hrcf26570_repro_evt2_splinecorr.fits
Corrected events written to 26571/repro/hrcf26571_repro_evt2_splinecorr.fits
Corrected events written to 26572/repro/hrcf26572_repro_evt2_splinecorr.fits
Corrected events written to 26573/repro/hrcf26573_repro_evt2_splinecorr.fits
Corrected events written to 26574/repro/hrcf26574_repro_evt2_splinecorr.fits
Corrected events written to 26575/repro/hrcf26575_repro_evt2_splinecorr.fits
Corrected events written to 26576/repro/hrcf26576_repro_evt2_splinecorr.fits
Corrected events written to 26577/repro/hrcf26577_repro_evt2_splinecorr.fits
Corrected events written to 26578/repro/hrcf26578_repro_evt2_splinecorr.fits
Corrected events written to 26579/repro/hrcf26579_repro_evt2_splinecorr.fits
