# Fit NuSTAR FPMA and FPMB simultaneously: single thermal component

In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

import astropy.units as u
import matplotlib.pyplot as plt
%matplotlib qt
plt.style.use('nice.mplstyle')
import numpy as np
import scipy.stats as st

from nustar_tools.spectra.grade_spectra import GradeCollection
from nustar_tools.spectra.response import ResponseHandler

import yaff.fitting as fitting
import importlib
importlib.reload(fitting)
from yaff import plotting as yap

In [None]:
def thermal(arg_dict: dict[str, object]):
    # Imports need to be inside the model
    # function for pickling/multiprocessing
    from sunkit_spex.legacy import thermal

    # The dict type annotation in the function
    # declaration is ambiguous; so, annotate the variables here
    ph_edges: np.ndarray = arg_dict['photon_energy_edges']
    params: dict[str, fitting.Parameter] = arg_dict['parameters']

    thermal_portion = thermal.thermal_emission(
        energy_edges=ph_edges << u.keV,
        temperature=params['temperature'].as_quantity(),
        emission_measure=params['emission_measure'].as_quantity()
    ).to_value(u.ph / u.s / u.keV / u.cm**2)

    return thermal_portion

In [None]:
data_dir = 'nustar-data/'
file_format = 'fpm{fpm}_g{grade}.pha'

collection = GradeCollection(
    f'{data_dir}{file_format}',
    grades = ['0-4'],
    fpms = ['A', 'B']
)
collection.prepare_data()

handlers = dict()
for detector in ('A', 'B'):
    rmf_file = f'{data_dir}/fpm{detector}_g0-4.rmf'
    arf_file = f'{data_dir}/fpm{detector}_g0-4.arf'
    handlers[detector] = ResponseHandler(rmf_file, arf_file)

In [None]:
def log_likelihood(data: fitting.DataPacket, model: np.ndarray):
    mids = data.count_energy_edges[:-1] +\
           np.diff(data.count_energy_edges)/2
    # Set energy bounds to restrict where we care about the likelihood
    energy_bounds = (mids >= 3) & (mids <= 5)

    # For Poisson likelihood, the model must comprise
    # of integers, otherwise scipy shits itself
    discrete_model = model.astype(int)

    # Any zero-count bins cannot contribute to the log-likelihood for two reasons:
    # 1. a "Poisson distribution" with expected value zero has variance zero, so
    #    pmf(x) = (1 if x == 0 else 0),
    #    meaning ANY model value other than zero will screw up the log likelihood
    # 2. even if the model IS exactly zero, it doesn't affect the log likelihood as
    #    ln(p(0)) = ln(1) = 0.
    restrict = (data.counts > 0) & energy_bounds
    return st.poisson(data.counts).logpmf(discrete_model)[restrict].sum()

# Define the parameters with their initial guesses (all frozen to start)
starting_parameters = {
    'temperature': fitting.Parameter(4 << u.MK, frozen=True),
    'emission_measure': fitting.Parameter(100 << (1e42 * u.cm**-3), frozen=True),
}

# The priors we give are just "bounds" on
# the physical values. They could be something
# more interesting like a truncated normal,
# or some other probability distribution.
log_priors = {
    'temperature': fitting.simple_bounds(0, 100),
    'emission_measure': fitting.simple_bounds(0, 10000),
}

# Name the parameter groups so we can loop
# over them later
thermal_names = ['temperature', 'emission_measure']

In [None]:
counts_a, edges_a = collection.data['A']['0-4'].spectrum

fitters = []
for detector in ('A', 'B'):
    counts, edges = collection.data[detector]['0-4'].spectrum
    handler = handlers[detector]
    srm = handler.srm

    dp = fitting.DataPacket(
        counts=counts,
        counts_error=np.sqrt(counts.value) << u.count,
        effective_exposure=collection.data[detector]['0-4'].exposure,
        background_counts=0*counts,
        background_counts_error=0*counts,
        count_energy_edges=edges,
        photon_energy_edges=handler.energy_edges,
        response_matrix=srm
    )

    fitters.append(fitting.BayesFitter(
        data=dp,
        model_function=thermal,
        parameters=starting_parameters,
        log_priors=log_priors,
        log_likelihood=log_likelihood
    ))

In [None]:
# Set up a composite fitter where both parameters are shared
# between the models
composite = fitting.CompositeBayesFitter(
    individual_fitters=fitters,
    shared_param_names=['temperature', 'emission_measure']
)

# Let both parameters vary individually
composite.shared_params['temperature'].frozen = False
composite.shared_params['emission_measure'].frozen = False

In [None]:
composite.parameters

In [None]:
fitting.normal_minimize(composite)

In [None]:
composite.parameters

In [None]:
composite.run_emcee({'nwalkers': 16}, {'nsteps': 300, 'progress': True})

In [None]:
chain = composite.emcee_sampler.flatchain.T
fig, axs = plt.subplots(nrows=2)
axs[0].plot(chain[0])
axs[1].plot(chain[1])

In [None]:
# Flatten the parameter names and units into a long list
# names = composite.all_param_names
# units = composite.all_param_units

best_t, best_em = np.mean(composite.emcee_sampler.flatchain, axis=0)

for fitter in fitters:
    fitter.parameters['temperature'].value = best_t
    fitter.parameters['emission_measure'].value = best_em

fita, fitb = fitters

fig = plt.figure(figsize=(16, 6))
figa, figb = fig.subfigures(nrows=1, ncols=2)

yap.plot_data_model(fita, fig=figa)
yap.plot_data_model(fitb, fig=figb)

In [None]:
best_t, best_em