# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

L. Xie  ->  Z.-L. Xie  |  ['L. Xie']
K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
M. Zhang  ->  M. Zhang  |  ['M. Zhang']
S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
K. Kreckel  ->  K. Kreckel  |  ['K. Kreckel']


B. Maity  ->  B. Maity  |  ['B. Maity']
Arxiv has 71 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2509.04553
extracting tarball to tmp_2509.04553...

 done.
Retrieving document from  https://arxiv.org/e-print/2509.04555
extracting tarball to tmp_2509.04555... done.
Retrieving document from  https://arxiv.org/e-print/2509.04558


extracting tarball to tmp_2509.04558... done.
Retrieving document from  https://arxiv.org/e-print/2509.04559


extracting tarball to tmp_2509.04559... done.


S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']


Found 132 bibliographic references in tmp_2509.04559/main_letter.bbl.
Retrieving document from  https://arxiv.org/e-print/2509.04569


 item = \bibitem[{ {HI4PI Collaboration} {et~al.}(2016){HI4PI Collaboration}, {Ben Bekhti}, {Fl{ö}er}, {Keller}, {Kerp}, {Lenz}, {Winkel}, {Bailin}, {Calabretta}, {Dedes}, {Ford}, {Gibson}, {Haud}, {Janowiecki}, {Kalberla}, {Lockman}, {McClure-Griffiths}, {Murphy}, {Nakanishi}, {Pisano},  {Staveley-Smith}}]{HI4PI2016}{HI4PI Collaboration}, {Ben Bekhti}, N., {Fl{ö}er}, L., {et~al.} 2016, \bibinfo{title}{{HI4PI: A full-sky H I survey based on EBHIS and GASS},} \aap, 594, A116, \dodoi{10.1051/0004-6361/201629178}
 regex = 
        \\bibitem(\[[^\[\]]*?\]){(?P<bibkey>[a-zA-Z0-9\-\+\.\S]+?)}(?P<authors>|([\D]*?))(?P<year>[12][0-9]{3})(?P<rest>.*)
        


extracting tarball to tmp_2509.04569... done.
Retrieving document from  https://arxiv.org/e-print/2509.05096


extracting tarball to tmp_2509.05096... done.


B. Maity  ->  B. Maity  |  ['B. Maity']


Found 63 bibliographic references in tmp_2509.05096/script_emu.bbl.
Error retrieving bib data for ghara2025: 'author'
Error retrieving bib data for ghara2025: 'author'


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.04559-b31b1b.svg)](https://arxiv.org/abs/2509.04559) | **X-ray investigation of possible super-Eddington accretion in a radio-loud quasar at $z=6.13$**  |
|| L. Ighina, et al. -- incl., <mark>S. Belladitta</mark> |
|*Appeared on*| *2025-09-08*|
|*Comments*| *Accepted for publication on ApJL on the 8th July 2025. 18 pages with 6 figures and 2 tables*|
|**Abstract**|            We present radio and X-ray observations of the recently discovered $z=6.13$ radio-powerful quasar RACS J032021.44$-$352104.1 using uGMRT, ATCA, LBA, and Chandra. The observed radio properties are in line with what is typically observed in high-$z$ radio quasars ($\alpha_{\rm r}=0.72\pm 0.02$ and L$_{\rm 1.4GHz}=5.8 \pm 0.9 \times 10^{26}$ W Hz$^{-1}$). Despite the relatively low X-ray flux observed $F_{\rm 0.5-7.0 keV}=2.3\pm0.5 \times 10^{-14}$ erg sec$^{-1}$ cm$^{-2}$, the intrinsic luminosity in the 2-10 keV rest frame is markedly high, $L_{\rm 2-10 keV}=1.8^{+1.1}_{-0.7} \times 10^{46}$ erg sec$^{-1}$, making RACS J032021.44$-$352104.1 one of the most luminous quasars currently known at $z>5.5$. The high X-ray luminosity is largely driven by an extrapolation to energies below the observable X-ray window with Chandra and the slope derived in the 0.5-7 keV band (or 3.5--50 keV in the rest-frame; $\Gamma_{\rm X}=3.3\pm0.4$). By analysing the overall spectral energy distribution of the quasar we found that the remarkably soft X-ray emission: (1) cannot be produced by relativistic jets, even when relativistic boosting is considered; and (2) is consistent with expectations for a super-Eddington accreting SMBH. If such a high accretion rate was confirmed, this source would be a unique laboratory to study high accretion in the early Universe and could help resolve some challenges inherent in early black hole growth paradigms.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.05096-b31b1b.svg)](https://arxiv.org/abs/2509.05096) | **An emulator-based forecasting on astrophysics and cosmology with 21 cm and density cross-correlations during EoR**  |
|| <mark>B. Maity</mark> |
|*Appeared on*| *2025-09-08*|
|*Comments*| *Accepted for publication in A&A, 12 pages, 9 figures*|
|**Abstract**|            The 21 cm signal arising from fluctuations in the neutral hydrogen field, and its cross-correlation with other tracers of cosmic density, are promising probes of the high-redshift Universe. In this study, we assess the potential of the 21 cm power spectrum, along with its cross power spectrum with dark matter density and associated bias, to constrain both astrophysics during the reionization era and the underlying cosmology. Our methodology involves emulating these estimators using an Artificial Neural Network (ANN), enabling efficient exploration of the parameter space. Utilizing a photon-conserving semi-numerical reionization model, we construct emulators at a fixed redshift ($z = 7.0$) for $k$-modes relevant to upcoming telescopes such as SKA-Low. We generate $\sim7000$ training samples by varying both cosmological and astrophysical parameters along with initial conditions, achieving high accuracy when compared to true simulation outputs. While forecasting, the model involves five free parameters: three cosmological ($\Omega_m$, $h$, $\sigma_8$) and two astrophysical (ionizing efficiency, $\zeta$, and minimum halo mass, $M_{\mathrm{min}}$). Using a fiducial model at the mid-reionization stage, we create a mock dataset and perform forecasting with the trained emulators. Assuming a 5% observational uncertainty combined with emulator error, we find that the 21 cm and 21 cm-density cross power spectra can constrain the Hubble parameter ($h$) to better than 6% at a confidence interval of 95%, with tight constraints on the global neutral fraction ($Q_{\mathrm{HI}}$). The inclusion of bias information further improves constraints on $\sigma_8$ (< 10% at 95% confidence). Finally, robustness tests with two alternate ionization states and a variant with higher observational uncertainty show that the ionization fractions are still reliably recovered, even when cosmological constraints weaken.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.04569-b31b1b.svg)](https://arxiv.org/abs/2509.04569) | **SDSS-V LVM: Detectability of Wolf-Rayet stars and their He II ionizing flux in low-metallicity environments I. The weak-lined, early-type WN3 stars in the SMC**  |
|| G. González-Torà, et al. -- incl., <mark>K. Kreckel</mark> |
|*Appeared on*| *2025-09-08*|
|*Comments*| *Submitted to A&A*|
|**Abstract**|            The Small Magellanic Cloud (SMC) is the nearest low-metallicity dwarf galaxy. Its proximity and low reddening has enabled us to detect its Wolf-Rayet (WR) star population with 12 known objects. Quantitative spectroscopy of the stars revealed half of these WR stars to be strong sources of He ii ionizing flux, but the average metallicity of the SMC is below where WR bumps are usually detected in integrated galaxy spectra showing nebular He ii emission. Utilizing the Local Volume Mapper (LVM), we investigate regions around the six SMC WN3h stars, whose winds are thin enough to avoid He recombination and allow photons with > 54 eV to escape. Focusing on He ii 4686 Å, we show that the broad stellar wind component, the strongest optical diagnostic of the WN3h stars, is diluted within 24 pc in the integrated light from LVM, making the WR stars hard to detect in low-metallicity integrated regions. In addition, we compare the He ii ionizing flux from LVM with the values inferred from the stellar atmosphere code PoWR and find that the nebular emission around them only in some cases reflects the high amounts emitted by the stars. We conclude that early-type WN stars with comparably weak winds are viable sources to produce the observed He ii ionizing flux in low-metallicity galaxies. The easy dilution of the stellar signatures can explain the rareness of WR bump detections at 12 + log O/H < 8.0, while at the same time providing major candidates for the observed excess of nebular He ii emission. This constitutes a challenge for population synthesis models across all redshifts as the evolutionary path towards this observed WR population at low metallicity remains enigmatic.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.04553-b31b1b.svg)](https://arxiv.org/abs/2509.04553) | **Characterizing the roles of transitory obscured phases and inner torus in shaping the fractions of obscured AGN at cosmic noon**  |
|| A. V. Alonso-Tetilla, et al. -- incl., <mark>L. Xie</mark> |
|*Appeared on*| *2025-09-08*|
|*Comments*| *MNRAS, accepted, 22 pages, 11 figures*|
|**Abstract**|            The origin of obscuration in Active Galactic Nuclei (AGN) is still a matter of contention. It is unclear whether obscured AGN are primarily due to line-of-sight effects, a transitory, dust-enshrouded phase in galaxy evolution, or a combination of both. The role of an inner torus around the central SMBH also remains unclear in pure Evolution models. We use cosmological semi-analytic models and semi-empirical prescriptions to explore obscuration effects in AGN at 1<z<3. We consider a realistic object-by-object modelling of AGN evolution including different light curves (LCs) composed of phases of varying levels of obscuration, mimicking the possible clearing effects of strong AGN feedback. Evolution models characterized by AGN LCs with relatively short pre-peak obscured phases followed by more extended optical/UV visible post-peak phases, struggle to reproduce the high fraction of obscured AGN at z~2-3 inferred from X-ray surveys. Evolution models characterised by LCs with sharp post-peak declines or persistent or multiple obscuration phases are more successful, although they still face challenges in reproducing the steady drop in the fractions of obscured AGN with increasing luminosity measured by some groups. Invoking a fine-tuning in the input LCs, with more luminous AGN defined by longer optical/UV visible windows, can improve the match to the decreasing fractions of obscured AGN with luminosity. Alternatively, a long-lived central torus-like component, with thickness decreasing with increasing AGN power, naturally boosts the luminosity-dependent fractions of obscured AGN, suggesting that small-scale orientation effects may still represent a key component even in Evolution models. We also find that in our models major mergers and starbursts, when considered in isolation, fall short in accounting for the large fractions of highly obscured faint AGN detected at cosmic noon.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.04555-b31b1b.svg)](https://arxiv.org/abs/2509.04555) | **Wide binaries in an ultra-faint dwarf galaxy: discovery, population modeling, and a nail in the coffin of primordial black hole dark matter**  |
|| C. Shariat, et al. -- incl., <mark>K. El-Badry</mark> |
|*Appeared on*| *2025-09-08*|
|*Comments*| *Submitted to PASP. All comments are welcome*|
|**Abstract**|            We report the discovery and characterization of a wide binary population in the ultrafaint dwarf galaxy Boötes I using deep JWST/NIRCam imaging. Our sample consists of 52 candidate binaries with projected separations of 7,000 - 16,000 au and stellar masses from near the hydrogen-burning limit to the main-sequence turnoff ($\sim0.1$ - $0.8~{\rm M_\odot}$). By forward-modeling selection biases and chance alignments, we find that $1.25\pm0.25\%$ of Boötes I stars are members of wide binaries with separations beyond 5,000 au. This fraction, along with the distributions of separations and mass ratios, matches that in the Solar neighborhood, suggesting that wide binary formation is largely insensitive to metallicity, even down to [Fe/H] $\approx -2.5$. The observed truncation in the separation distribution near 16,000 au is well explained by stellar flyby disruptions. We also discuss how the binaries can be used to constrain the galaxy's dark matter properties. We show that our detection places new limits on primordial black hole dark matter, finding that compact objects with $M \gtrsim 5~{\rm M_\odot}$ cannot constitute more than $\sim1\%$ of the dark matter content. In contrast to previous work, we find that wide binaries are unlikely to provide robust constraints on the dark matter profile of ultrafaint galaxies given the uncertainties in the initial binary population, flyby disruptions, and contamination from chance alignments. These findings represent the most robust detection of wide binaries in an external galaxy to date, opening a new avenue for studying binary star formation and survival in extreme environments.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.04558-b31b1b.svg)](https://arxiv.org/abs/2509.04558) | **A carbon-rich atmosphere on a windy pulsar planet**  |
|| <mark>M. Zhang</mark>, et al. |
|*Appeared on*| *2025-09-08*|
|*Comments*| *Submitted to ApJL*|
|**Abstract**|            A handful of enigmatic Jupiter-mass objects have been discovered orbiting pulsars. One such object, PSR J2322-2650b, uniquely resembles a hot Jupiter exoplanet due to its minimum density of 1.8 g/cm^3 and its ~1900 K equilibrium temperature. We use JWST to observe its emission spectrum across an entire orbit. In stark contrast to every known exoplanet orbiting a main-sequence star, we find an atmosphere rich in molecular carbon (C3, C2) with strong westward winds. Our observations open up a new exoplanetary chemical regime (ultra-high C/O and C/N ratios of >100 and >10,000 respectively) and dynamical regime (ultra-fast rotation with external irradiation) to observational study. The extreme carbon enrichment poses a severe challenge to the current understanding of "black widow" companions, which were expected to consist of a wider range of elements due to their origins as stripped stellar cores.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2509.04559/./figures/LumX_highzQua.png', 'tmp_2509.04559/./figures/SED_DESJ03-35.png', 'tmp_2509.04559/./figures/DESJ35_200.png', 'tmp_2509.04559/./figures/DES35_400.png', 'tmp_2509.04559/./figures/DES35_650.png', 'tmp_2509.04559/./figures/DES35_21.png', 'tmp_2509.04559/./figures/DES35_55.png', 'tmp_2509.04559/./figures/DES35_90.png', 'tmp_2509.04559/./figures/Chandra_DESJ03-35.png', 'tmp_2509.04559/./figures/X-ray_cont_desJ0320-35.png']
copying  tmp_2509.04559/./figures/LumX_highzQua.png to _build/html/
copying  tmp_2509.04559/./figures/SED_DESJ03-35.png to _build/html/
copying  tmp_2509.04559/./figures/DESJ35_200.png to _build/html/
copying  tmp_2509.04559/./figures/DES35_400.png to _build/html/
copying  tmp_2509.04559/./figures/DES35_650.png to _build/html/
copying  tmp_2509.04559/./figures/DES35_21.png to _build/html/
copying  tmp_2509.04559/./figures/DES35_55.png to _build/html/
copying  tmp_2509.04559/./figures/DES35_90.png to _build/html/
copying  tmp_2509.04

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\longname}{RACS~J032021.44-352104.1 }$
$\newcommand{\shortname}{RACS~J0320-35}$
$\newcommand$
$\newcommand$
$\newcommand$</div>



<div id="title">

# X-ray investigation of possible super-Eddington accretion in a radio-loud quasar at $z=6.13$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2509.04559-b31b1b.svg)](https://arxiv.org/abs/2509.04559)<mark>Appeared on: 2025-09-08</mark> -  _Accepted for publication on ApJL on the 8th July 2025. 18 pages with 6 figures and 2 tables_

</div>
<div id="authors">

L. Ighina, et al. -- incl., <mark>S. Belladitta</mark>

</div>
<div id="abstract">

**Abstract:** We present radio and X-ray observations of the recently discovered $z=6.13$ radio-powerful quasar RACS J032021.44 $-$ 352104.1 using uGMRT, ATCA, LBA, and _Chandra_ .The observed radio properties are in line with what is typically observed in high- $z$ radio quasars ( $\alpha_{\rm r}=0.72\pm 0.02$ and L $_{\rm 1.4GHz}=5.8 \pm 0.9 \times 10^{26}$ W Hz $^{-1}$ ). Despite the relatively low X-ray flux observed $F_{\rm 0.5-7.0 keV}=2.3\pm0.5 \times 10^{-14}$ erg sec $^{-1}$ cm $^{-2}$ , the intrinsic luminosity in the 2--10 keV rest frame is markedly high, $L_{\rm 2-10 keV}=1.8^{+1.1}_{-0.7} \times 10^{46}$ erg sec $^{-1}$ , making RACS J032021.44 $-$ 352104.1 one of the most luminous quasars currently known at $z>5.5$ . The high X-ray luminosity is largely driven by an extrapolation to energies below the observable X-ray window with $_ Chandra_$ and the slope derived in the 0.5-7 keV band (or 3.5--50 keV in the rest-frame; $\Gamma_{\rm X}=3.3\pm0.4$ ).By analysing the overall spectral energy distribution of the quasar we found that the remarkably soft X-ray emission: (1) cannot be produced by relativistic jets, even when relativistic boosting is considered; and (2) is consistent with expectations for a super-Eddington accreting SMBH.If such a high accretion rate was confirmed, this source would be a unique laboratory to study high accretion in the early Universe and could help resolve some challenges inherent in early black hole growth paradigms.

</div>

<div id="div_fig1">

<img src="tmp_2509.04559/./figures/LumX_highzQua.png" alt="Fig3.1" width="50%"/><img src="tmp_2509.04559/./figures/SED_DESJ03-35.png" alt="Fig3.2" width="50%"/>

**Figure 3. -** ** Left panel:** X-ray luminosity in the 2--10 keV energy band (rest frame) as a function of redshift for the $z>5.5$ quasars with X-ray observations from either _ Chandra_ or _ XMM-Newton_ available in the literature. We highlight the following objects: $\shortname$(red square; this work), HSC J092120.56+000722.9 (pink diamond;  ([Wolf, Nandra and Salvato 2023]()) ), CFHQS J142952+544717 (yellow triangles;  ([Migliori, Siemiginowska and Sobolewska 2023](), [Marcotulli, Connor and Bañados 2025]()) ) and PSO J030947+271757 (purple pentagon;  ([Moretti, Ghisellini and Caccianiga 2021]()) ). Since these last two sources present variable/flaring emission, we show two data-points representing the quiescent and the flaring state respectively. All the other quasars are reported with different colours based if they are also detected in the radio band (purple) or not (green). ** Right panel:** Rest-frame, multi-wavelength spectral energy distribution of $\shortname$. The X-ray weak super-Eddington SED from [Pacucci and Narayan (2024)]() is shown in magenta. The solid black line is a quasar template  ([Polletta, Tajer and Maraschi 2007]())  matched to the optical-UV data-points. The dashed black line is the X-ray emission expected from the UV-X-ray relation derived by [Lusso and Risaliti (2016)]() and assuming $\Gamma_{\rm X}=2.0$. The gray regions show the 1,2$\sigma$ dispersion of the relation. The vertical yellow region indicates frequencies heavily affected by the absorption of the intergalactic medium. (*fig:Lum_comp*)

</div>
<div id="div_fig2">

<img src="tmp_2509.04559/./figures/DESJ35_200.png" alt="Fig4.1" width="16%"/><img src="tmp_2509.04559/./figures/DES35_400.png" alt="Fig4.2" width="16%"/><img src="tmp_2509.04559/./figures/DES35_650.png" alt="Fig4.3" width="16%"/><img src="tmp_2509.04559/./figures/DES35_21.png" alt="Fig4.4" width="16%"/><img src="tmp_2509.04559/./figures/DES35_55.png" alt="Fig4.5" width="16%"/><img src="tmp_2509.04559/./figures/DES35_90.png" alt="Fig4.6" width="16%"/>

**Figure 4. -** MWA (200 MHz), uGMRT (400 and 650 MHz) and ATCA (2.1, 5.5 and 9 GHz) images centred on the optical position of RACS J0320$-$35. Contours start at $\pm$3$\times$RMS and increase by factors of $\sqrt{2}$. (*fig:radio_images*)

</div>
<div id="div_fig3">

<img src="tmp_2509.04559/./figures/Chandra_DESJ03-35.png" alt="Fig1.1" width="50%"/><img src="tmp_2509.04559/./figures/X-ray_cont_desJ0320-35.png" alt="Fig1.2" width="50%"/>

**Figure 1. -** ** Left:**_ Chandra_ images (20$"\times$20$"$) of RACS J0320$-$35 in the energy band 0.5--7 keV. The red cross indicates the optical position of the quasar, consistent with the X-ray source detected in the _ Chandra_ image. ** Rgiht:** Contour levels of the photon index and normalisation parameter derived from the fit of the _ Chandra_ observations. Different colours indicate different observation segments, with the filled ellipses showing the contours from all the observations combined. Dashed (solid) lines indicate the 90\%(68\%) confidence regions. (*fig:X_cont*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2509.04559"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\be}{\begin{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand\bear{#1}\ear{\begin{align}#1\end{align}}$
$\newcommand{\nline}{\notag \\}$
$\newcommand{\f}{\frac}$
$\newcommand{\de}{\mathrm{d}}$
$\newcommand{\del}{\partial}$
$\newcommand{\half}{\frac{1}{2}}$
$\newcommand{\im}{\mathrm{i}}$
$\newcommand{\e}{\mathrm{e}}$
$\newcommand{\Msun}{\mathrm{M}_{\odot}}$
$\newcommand{\eqn}[1]{equation~(\ref{#1})}$
$\newcommand{\eqns}[2]{equations~(\ref{#1}) and~(\ref{#2})}$
$\newcommand{\secn}[1]{Section~\ref{#1}}$
$\newcommand{\appndx}[1]{Appendix~\ref{#1}}$
$\newcommand{\fig}[1]{Fig.~\ref{#1}}$
$\newcommand{\figs}[1]{Figs.~\ref{#1}}$
$\newcommand{\tab}[1]{Table~\ref{#1}}$
$\newcommand{\BM}[1]{{\color{blue}[{\bf }#1]}}$
$\newcommand{\red}[1]{{\color{red} #1}}$
$\usepackage[T1]{fontenc}$
$\usepackage{amsmath}$
$\DeclareRobustCommand{\VAN}[3]{#2}$
$\let\VANthebibliography\thebibliography$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$
$\usepackage{graphicx}$
$\usepackage{txfonts}$
$\begin{document}$
$   \title{An emulator-based forecasting on astrophysics and cosmology with 21 cm and density cross-correlations during EoR}$
$   \author{Barun Maity$
$          \inst{1}}$
$   \institute{Max-Planck-Institut für Astronomie, Königstuhl 17, D-69117 Heidelberg, Germany\               \email{maity@mpia.de}}$
$   \date{Received XXX; accepted XXX}$
$\abstract{The 21 cm signal arising from fluctuations in the neutral hydrogen field, and its cross-correlation with other tracers of cosmic density, are promising probes of the high-redshift Universe. In this study, we assess the potential of the 21 cm power spectrum, along with its cross power spectrum with dark matter density and associated bias, to constrain both astrophysics during the reionization era and the underlying cosmology. Our methodology involves emulating these estimators using an Artificial Neural Network (ANN), enabling efficient exploration of the parameter space. Utilizing a photon-conserving semi-numerical reionization model, we construct emulators at a fixed redshift (z = 7.0) for k-modes relevant to upcoming telescopes such as SKA-Low. We generate \sim7000 training samples by varying both cosmological and astrophysical parameters along with initial conditions, achieving high accuracy when compared to true simulation outputs. While forecasting, the model involves five free parameters: three cosmological (\Omega_m, h, \sigma_8) and two astrophysical (ionizing efficiency, \zeta, and minimum halo mass, M_{\mathrm{min}}). Using a fiducial model at the mid-reionization stage, we create a mock dataset and perform forecasting with the trained emulators. Assuming a 5\% observational uncertainty combined with emulator error, we find that the 21 cm and 21 cm-density cross power spectra can constrain the Hubble parameter (h) to better than 6\% at a confidence interval of 95\%, with tight constraints on the global neutral fraction (Q_{\mathrm{HI}}). The inclusion of bias information further improves constraints on \sigma_8 (< 10\% at 95\% confidence). Finally, robustness tests with two alternate ionization states and a variant with higher observational uncertainty show that the ionization fractions are still reliably recovered, even when cosmological constraints weaken.}$
$   \keywords{intergalactic medium -- cosmology: theory – dark ages, reionization, first stars -- large-scale structure of Universe}$
$   \titlerunning{inferring astro-cosmo with 21 cm and density cross-correlation emulator}$
$   \maketitle$
$\section{Introduction}$
$\label{sec:intro}$
$The Epoch of Reionization (EoR) signifies the last major phase transition in the cosmic history of our Universe, when it evolves from a mostly neutral to a mostly ionized state \citep[for reviews, see][]{2001PhR...349..125B,2009CSci...97..841C,2018PhR...780....1D,2022arXiv220802260G,2022GReGr..54..102C}. The fluctuation in the neutral hydrogen field during EoR can be potentially traced by the redshifted 21 cm signal, which arises due to the spin flip transition of the neutral hydrogen atoms at the ground state. The signal carries useful information on cosmological and astrophysical properties of this high redshift epoch. This can new comprehensive answers to the questions about the ionization and thermal state of the high redshift intergalactic medium (IGM), nature of the first ionizing sources, and the timeline of reionization epoch. It can also inform us about cosmic expansion and structure evolution  ([ and Pritchard 2012]()) .$
$While the radio interferometers are gradually improving their sensitivity to detect the 21 cm signal, these still face significant challenges in terms of foreground contamination and instrument characterization, allowing only upper limits on the detection. These efforts include independent groups focussing on different telescopes such as the Low Frequency Array \citep[LOFAR;][]{2019MNRAS.488.4271G,2020MNRAS.493.1662M,2025arXiv250305576M}, the Murchison Winewcommandield Array \citep[MWA;][]{2019ApJ...884....1B,2020MNRAS.493.4711T,2025arXiv250509097N}, the Giant Metrewave Radio Telescope \citep[GMRT;][]{2013MNRAS.433..639P}, and the Hydrogen Epoch of Reionization Array \citep[HERA phase I;][]{hera2023} at redshift range z\sim6-10. A few projects, such as Owens Valley Long Wavelength Array \citep[OVRO-LWA][]{2019AJ....158...84E}  and New Extension in Nançay Upgrading LOFAR \citep[NenuFAR][]{2021sf2a.conf..211M,2024A&A...681A..62M}, also aim for higher redshifts covering cosmic dawn. The limits have already been exploited to constrain some of the extreme reionization models  (ghara2025) . However, with upcoming facilities like SKA-Low (AA* and AA4 configuration), we expect to detect the signal with percentage-level uncertainties.$
$As 21 cm signal is supposed to be mainly driven by astrophysical processes, most of the EoR studies with 21 cm signal as a probe mainly focus on constraining uncertain astrophysical parameters, keeping the underlying cosmology fixed. Nonetheless, 21 cm signal can also play a crucial role in probing cosmology in combination with other probes like CMB  ([McQuinn, et. al 2006](), [ and Liu 2016]()) . However, the studies aiming for cosmological forecasts with 21 cm signal require efficient models to explore astrophysics and cosmology simultaneously. To this end, analytical halo model of reionization  ([Schneider, Schaeffer and Giri 2023]())  and machine learning based techniques  ([Kern, et. al 2017](), [Hassan, Andrianomena and Doughty 2020]())  have recently been exploited, highlighting the prospects of constraining cosmology and astrophysics with 21 cm. Specifically, the approach of creating emulators of observables or likelihoods has been reasonably successful in inferring astrophysical parameters from 21 cm power spectra and EoR observables  ([ and Shimabukuro 2017](), [ and Schmit 2018](), [Breitman, Mesinger and Murray 2024](), [Sikder, et. al 2024](), [Maity, Paranjape and Choudhury 2023](), [Choudhury, Ghara and Zaroubi 2024](), [Choudhury, Paranjape and Maity 2024]()) .$
$Parallelly, the windows are now getting open for synergetic studies with 21 cm and other high redshift probes. For example, the distribution of Ly-\alpha emitting high redshift galaxies can be used as a biased tracer of large scale density structure of the universe and is useful for cross-correlation with 21 cm  ([Vrbanec and Ciardi 2016](), [Mirocha, et. al 2023](), [Moriwaki, Beane and Lidz 2024]()) . There exist other tracers of density, such as intensity maps \citep[as a review, see, ][]{2017arXiv170909066K} of CO  ([Lidz, Furlanetto and Oh 2011]()) , CII  ([Gong, Cooray and Silva 2012]()) , H\alpha  ([ and Heneka 2021]()) , OIII  ([Moriwaki, Yoshida and Shimizu 2018]()) ;  which can also be utilized for cross-correlation studies. Hence, the cross-correlation between 21 cm and cosmic matter density, an estimator independent of any specific tracers, can new us with a potential probe of the high redshift universe  ([Xu, Xu and Yue 2019]()) . Furthermore, the cross power spectra is a superior probe in terms of signal-to-noise ratio due to uncorrelated systematics and can complement the pure 21 cm auto power spectra signal. With the availability of current and upcoming telescopes like James Webb Space Telescope (JWST), Nancy Grace Roman Space Telescope (NGRST), Extremely Large Telescope (ELT) etc, the cross-correlation prospects have been shown to be promising in gleaning astrophysical signal during EoR  ([Gagnon-Hartman, Davies and Mesinger 2025]()) . In principle, the cross-correlation information can also be utilized to infer cosmology, which has already been explored in low redshift studies  ([Berti, Spinelli and Viel 2024](), [Autieri, et. al 2025]()) .$
$In this study, we aim to check the prospects of 21 cm and its cross-correlation information with matter density in constraining both astrophysics and cosmology during reionization epoch.  Unlike other semi-numerical approach based on the excursion set algorithm, we utilize a more realistic prescription newd by \textbf{S}emi Numerical \textbf{C}ode for \textbf{R}e\textbf{I}onization with \textbf{P}ho\textbf{T}on Conservation (\texttt{SCRIPT}) to generate the neutral hydrogen fluctuation field. We consider only a single redshift (z=7.0) for creating the emulators and pursuing parameter exploration, which gives us a starting point as a proof of concept.  However, this can be extended to multiple redshifts, exploring the full power of 21 cm observables in future studies.$
$The paper is organized as follows: In section \ref{sec:theory}, we describe the reionization model and newcommandine the observables explored. Next, we discuss building the emulators of those observables, highlighting the performance of our emulators against the true values in section \ref{sec:emulator}. Once the emulator is trained, we describe the mock generation procedure in section \ref{sec:gen_mock} and parameter exploration in section \ref{sec:param_exp}. Then, we discuss our main results in section \ref{sec:results}. Finally, we summarize the paper in section \ref{sec:conc}.$
$\iffalse$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/True_predicted_comp_21cm.pdf}$
$    \caption{Comparison of True 21 cm power spectrum and corresponding predicted estimates using ANN at different k bins used in this work. The black points correspond to test dataset while the red line signifies True=Prediction.}$
$    \label{fig:comp_21_pow}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/True_predicted_comp_21cm_matcross.pdf}$
$    \caption{Comparison of True cross power spectrum of 21cm and \delta field and corresponding predicted estimates using ANN at different k bins used in this work. Other descriptions are similar to Figure \ref{fig:comp_21_pow}.}$
$    \label{fig:comp_21_mat_cross}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/True_predicted_comp_21cm_bias.pdf}$
$    \caption{Comparison of True 21 cm bias and corresponding predicted estimates using ANN at different k bins used in this work.  Other descriptions are similar to Figure \ref{fig:comp_21_pow}.}$
$    \label{fig:comp_21_bias}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/samples_predict_true.pdf}$
$    \caption{Plots of 21 cm power spectra, 21 cm-matter cross power, and its bias for a few random models from the test set. The solid lines are the true models, while the dashed lines are the corresponding predictions.}$
$    \label{fig:comp_samples}$
$\end{figure*}$
$\begin{figure}$
$    \centering$
$    \includegraphics[width=0.9\columnwidth]{21cm_cosmology/True_predicted_comp_QHI.pdf}$
$    \caption{Comparison of True neutral fraction (Q_{\mathrm{HI}}^M) and corresponding Predicted estimates using GPR}$
$    \label{fig:comp_Q}$
$\end{figure} \fi$
$\section{Reionization model and observables/estimators}$
$\label{sec:theory}$
$The reionization models implemented in \texttt{SCRIPT} were originally introduced by [ and Choudhury (2018)]() and have since been exploited with various observables  ([ and Maity 2022](), [ and Maity 2022]()) . In this work, we adopt the simplest version of this framework—a two-parameter reionization model previously used for 21 cm forecasting  ([ and Maity 2023]()) . For completeness, we briefly summarize the methodology here.$
$\texttt{SCRIPT} simulates the ionization state of the Universe within a cosmologically representative volume, enabling the computation of large-scale ionization fluctuation power spectra that are converged with respect to the resolution of the simulation box  ([ and Choudhury 2018]()) . To initialize the model, we new the density field and the spatial distribution of collapsed halos capable of emitting ionizing radiation. Focusing on large-scale IGM features, we use the second-order Lagrangian perturbation theory (2LPT) to generate the density field, rather than relying on computationally expensive full N-body simulations. Specifically, we used the implementation by [ and Hahn (2011)]()\footnote{\url{https://www-n.oca.eu/ohahn/MUSIC/}}. This model also allows us to vary different cosmological parameters (\Omega_m, h, \sigma_8, n_s, w_0 in this case) as well as the initial seed for generating the fluctuating fields. The parameters have the standard meanings i.e., \Omega_m: dark matter density, h: bubble parameter, \sigma_8: quantifies the amplitude of primordial matter fluctuations, n_s: tilt of the primordial power spectra, and w_0: dark energy equation of state. We fix the baryonic density parameter (\Omega_b=0.0482) throughout the study. The distribution of halos is computed using a subgrid approach based on the conditional ellipsoidal collapse mass function  ([ and Sheth 2002]()) . Although this approach has been proven to be extremely successful for standard \LambdaCDM models, one essential assumption for our study is that the prescription remains the same for the range of cosmology models considered here.  Simulations are conducted within a comoving box of size 256~h^{-1}\mathrm{cMpc}, which has been shown to be sufficient for the observables considered here, as demonstrated in recent literature  ([Iliev, Mellema and Ahn 2014](), [Kaur, Gillet and Mesinger 2020]()) .$
$The spatial resolution is set to \Delta x = 2~h^{-1}\mathrm{cMpc}, adequate for capturing the scales accessible to SKA-Low.$
$As mentioned earlier, we use a basic reionization model containing two free parameters, which are needed to get the ionization topology. The model adopts the photon-conserving algorithm to construct the reionization topology within the simulation box. Specifically, the ionization field relies on the ionization efficiency parameter (\zeta), which estimates the available ionizing photons per hydrogen atom and minimum threshold halo mass (M_{\mathrm{min}}) required to get the fraction of mass collapsed inside a halo. We restrict ourselves to this simple two-parameter setup as we aim to pursue a prospective forecasting study with 21 cm and its cross-correlations with matter density, while simultaneously varying astrophysical and cosmological parameters. This basic setup helps us to gain the required efficiency by minimizing the parameter space. However, the study can be expanded with more physical models of reionization, including recombination and radiative feedback effects  ([ and Maity 2022]())  in a future project.$
$In general, any model of reionization produces the ionized hydrogen fraction x_{\mathrm{HII}, i} in grid cells (represented by the index i) inside a simulation volume. The differential brightness temperature (assuming spin temperature is very much larger than CMB temperature) is then given by  ([Madau, Meiksin and Rees 1997](), [ and Ciardi 2003]()) $
$\be$
$\label{eq:delta_Tb}$
$\delta T_{b, i} \approx 27~\mathrm{mK}  \left(1 - x_{\mathrm{HII}, i}\right) \Delta_i \left(\frac{1+z}{10}\frac{0.15}{\Omega_{m}h^2}\right)^{1/2} \left(\frac{\Omega_{b}h^2}{0.023}\right),$
$\ee$
$where \Delta_i \equiv \rho_{m, i} / \bar{\rho}_m is the ratio of the matter density \rho_{m,i} in the grid cell and the mean matter density \bar{\rho}_m.$
$Given these quantities, the 21 cm power spectrum can be computed as$
$\be$
$P_{21}(k) = \langle\hat\delta_{21}(k)\hat\delta_{21}^*(k)\rangle$
$\ee$
$where \hat\delta_{21}(k) is the Fourier transform of the mean-subtracted normalized fluctuation field, \delta T_{b,i}/\langle \delta T_{b, i} \rangle-1.$
$Similarly, the cross power spectra between 21cm field and the matter density field are given by$
$\be$
$P_{21\times\delta}=\langle\hat\delta_{21}(k)\hat\delta_m^*(k)\rangle$
$ \ee$
$where \hat\delta_m(k) is the Fourier transform of matter density contrast (\Delta_i-1). It is worth highlighting that 21 cm-density cross power spectrum can not be observed directly by any tracers, but these can be derived by observing galaxy distribution and estimating galaxy bias with respect to the background dark matter distribution. {With the assumption of linear galaxy bias, galaxy density is essentially proportional to the matter density  ([Mirocha, et. al 2023]()) . The linear bias is expected to be a reasonable approximation for the large-scale modes, which are of interest in this study. Hence, 21 cm along with high redshift galaxy surveys, can be utilized as a direct probe of the cross-correlation. For simplification, we use the term observables even for the indirect estimators unless otherwise specified.$
$\iffalse The cross power spectra between 21 cm and collapsed fraction field (f_{\mathrm{coll}}) can be estimated as$
$\be$
$P_{21\times\mathrm{coll}}=\langle\hat\delta_{21}(k)\delta_{\mathrm{coll}}^*(k)\rangle$
$ \ee$
$where \delta_{\mathrm{coll}}^*(k) is the Fourier transform of the fluctuation in mass weighted collpased fraction field, f_{\mathrm{coll,i}}\Delta_i/\langle f_{\mathrm{coll,i}}\Delta_i\rangle -1. This cross power can potentially be estimated from synergies between 21 cm and CO intensity field.  \fi$
$Throughout the paper, we will work with dimensionless power spectra which are given by$
$\begin{equation}$
$\label{eq:Delta_21}$
$   \Delta_{X}^2(k) = \frac{k^3 P_{X}(k)}{2 \pi^2},$
$\end{equation}$
$where P_X corresponds to the different power spectra (P_{21}, P_{21\times\delta}) as newcommandined earlier. The 21 cm field and the density field are supposed to be highly anti-correlated at large scales, providing negative cross power. This is expected due to the efficient ionization of high density regions, forming the ionizing sources, and similarly, less efficient ionization at low density regions. Hence, we use the amplitude of cross power spectra (\vert \Delta_{21\times\delta}^2\vert) as the probe in this study \citep[see also, ][]{Moriwaki2024}. We can further newcommandine the bias of the 21cm-density cross power spectrum with respect to matter power spectrum (P_{\delta\delta}) as$
$\be$
$\label{eq:bias}$
$b_{21\times\delta}^2 (k) = \left\vert\frac{P_{21\times\delta}}{P_{\delta\delta}}\right\vert$
$\ee$
$This is relatively easy to estimate due to uncorrelated systematics in the cross spectra than the bias of 21 cm auto power spectra. Hence, we choose this probe instead of the bias of the auto power spectra. We also quantify the state of the IGM by globally averaged neutral fraction, Q_{\mathrm{HI}}=\langle (1-x_{\mathrm{HII,i}})\Delta_i\rangle . As discussed earlier, we aim to check the prospects of these observables in constraining astrophysical parameters relevant for EoR as well as inferring the underlying cosmological model. This further demands efficient ways for computation, which is discussed in the next section.$
$\section{Emulating the observables/estimators}$
$\label{sec:emulator}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/True_predicted_comp_21cm.pdf}$
$    \caption{Comparison of true 21 cm power spectrum and corresponding predicted estimates using ANN at different k bins used in this work. The black points correspond to test dataset while the red line signifies True=Prediction. The corresponding R^2 value is 0.98.}$
$    \label{fig:comp_21_pow}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/True_predicted_comp_21cm_matcross.pdf}$
$    \caption{Comparison of true cross power amplitude between 21 cm and \delta_m field with the corresponding predicted estimates using ANN at different k bins used in this work. Other descriptions are similar to Figure \ref{fig:comp_21_pow}. This corresponds to an R^2 value of 0.99.}$
$    \label{fig:comp_21_mat_cross}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/True_predicted_comp_21cm_bias.pdf}$
$    \caption{Comparison of true 21 cm bias and the corresponding predicted estimates using ANN at different k bins used in this work.  Other descriptions are similar to Figure \ref{fig:comp_21_pow}. The corresponding R^2 value is 0.92.}$
$    \label{fig:comp_21_bias}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/samples_predict_true.pdf}$
$    \caption{Plots of 21 cm power spectra, 21 cm-density cross power, and its bias for a few random models from the test set. The solid lines are the true models, while the dashed lines are the corresponding predictions.}$
$    \label{fig:comp_samples}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/mock_models.pdf}$
$    \caption{Top panel: Snapshots of Density (\Delta), collapsed fraction (f_{\mathrm{coll}}), and neutral fraction field (x_{\mathrm{HI}}), gradually from left to right, for the fiducial model utilized to generate the mock dataset as described in section \ref{sec:gen_mock}. Bottom panel: The 21 cm power spectra (\langle\delta T_b\rangle^2\Delta_{21}^2), 21 cm and density cross power spectra (\vert \Delta_{21\times\delta}^2\vert), and the bias of cross spectra (b^2_{21\times\delta}) for the corresponding model, gradually from left to right.}$
$    \label{fig:mock_model}$
$\end{figure*}$
$\begin{figure}$
$    \centering$
$    \includegraphics[width=0.9\columnwidth]{21cm_cosmology/True_predicted_comp_QHI_a.pdf}$
$    \caption{Comparison of True neutral fraction (Q_{\mathrm{HI}}) and corresponding Predicted estimates using Gaussian Process Regression.}$
$    \label{fig:comp_Q}$
$\end{figure}$
$In order to pursue parameter space exploration, we construct an emulator which can predict the observables given a set of parameters. In this study, we have two astrophysical parameter (\zeta, M_{\mathrm{min}}) newcommandining reionization model and five cosmological parameters (\Omega_m, h, \sigma_8, n_s, w_0) initially. However, we fix n_s and w_0 at standard values while pursuing parameter exploration as these are not expected to substantially affect the scales and the redshifts considered here. Hence, we go ahead with rest of the three cosmological parameters, acquiring efficiency. Now, the idea is to predict the observable values given the free parameters as inputs. To this end, we utilize supervised machine learning technique, specifically, an Artificial Neural Network (ANN) to train the emulator.$
$\subsection{Artificial Neural Network (ANN) in Brief}$
$ An ANN is composed of an input layer, one or more hidden layers, and an output layer. The input layer receives raw data, while the hidden layers perform complex computations to extract meaningful features. The output layer then news the final prediction. Each connection between neurons has an associated weight and bias, which are adjusted during the training to minimize errors. Further, in order to allow the network to learn complex patterns, non-linearities are introduced by activation functions, such as ReLU and Sigmoid. The learning process is guided by a loss function, which measures the error, and an optimization algorithm, such as gradient descent or Adam, which updates the weights through backpropagation \citep[for details, see][]{2007JEI....16d9901B,2020MNRAS.491.4031C}. A portion of the training datasets is used for validation purposes during the development of the emulator, and this process progresses in an iterative manner. Once the network is fully trained, it is tested on a different set of data, providing a robustness check on the predictions. In general, ANNs can face two key challenges during training, i.e., underfitting and overfitting. Underfitting occurs when the model is too simple to capture the patterns in the data, leading to poor performance on both the training and validation sets. This often happens when the network has too few layers or neurons, or when the training time is insufficient. On the other side, overfitting happens when the model learns the noise and specific details of the training data instead of generalizing to new data. This results in excellent performance on the training set but poor accuracy on unseen validation or test data. Overfitting is common when the network is too complex or trained for too many epochs without proper regularization. To check the performance of the network, we use R^2 metric score, which is newcommandined as$
$\be$
$R^2 = 1 - \frac{\sum (y_{\mathrm{true}}-y_{\mathrm{predict}})^2}{\sum (y_{\mathrm{true}}-\langle y_{\mathrm{true}}\rangle)^2}$
$\ee$
$where y_{\mathrm{true}} is the true value of the observables from simulation, \langle y_{\mathrm{true}}\rangle is the average from the test set and y_{\mathrm{predict}} is the corresponding prediction from the network. This essentially news an assessment for goodness of fit, where metric value can vary from 0 to 1.  As the R^2 value gets closer to 1, the prediction capability of the emulator gets better.$
$\subsection{Training Procedure}$
$To generate the training dataset, we vary the different parameters within a reasonable prior ranges i.e. \Omega_m:[0.2,0.4], h:[0.6,0.8], \sigma_8:[0.7,0.9], n_s:[0.9,1.0], w_0:[-2,0], \zeta:[1,40] and \log M_{\min}:[7,12]. The baryonic density parameter, \Omega_b is fixed at 0.0482, obeying the findings from CMB spectra  ([Aghanim and Akrami 2020]()) . We store the power spectra in 10 different bins between k\simeq0.05 ~\mathrm{h/cMpc} to 1.084~\mathrm{h/cMpc}. However, we take only 6 bins in the range k\simeq0.11 ~\mathrm{h/cMpc} to 0.84~\mathrm{h/cMpc} for further analysis, which are expected to be probed by upcoming instruments like SKA-Low. The goal here is to predict the corresponding amplitude in those bins given a set of parameter values. Given that motivation, we generate a total of 6750 samples for each type of observables (21 cm power spectra, 21cm-density cross power spectra amplitude and cross bias) by randomly varying these parameters. Among these samples, 500 correspond to different realizations of the initial seed. This further takes into account the cosmic variance uncertainties during the training. We utilize publicly available Scikit-learn and TensorFlow packages in Python to implement the network.  We split the sample in training and testing sets with a ratio of 80 to 20. Our assumed network architecture is summarized in Table \ref{tab:arch}.  We use ReLU activation between the layers and Adam optimizer in this setup. An architecture with 10 hidden layers along with one input and one output layer, performs well to serve the purpose of the study.$
$\begin{table}$
$\centering$
$\caption{ANN architecture for training the 21 cm power spectra and cross bias}$
$\newcommand{\arraystretch}{1.2}$
$\setlength{\tabcolsep}{10pt}$
$\begin{threeparttable}$
$\begin{tabular}{cc}$
$\hline$
$Layers & Description \ \hline$
$Input & free parameters\ Dense 1 & (512 neurons, activation='relu')\ Dense 2 & (1024 neurons, activation='relu')\ Dense 3 & (1024 neurons, activation='relu')\ Dense 4 & (512 neurons, activation='relu')\ Dense 5 & (512 neurons, activation='relu')\ Dense 6 & (256 neurons, activation='relu')\ Dense 7 & (128 neurons, activation='relu')\ Dense 8 & (64 neurons, activation='relu')\ Dense 9 & (32 neurons, activation='relu')\ Dense 10 & (16 neurons, activation='relu')\ output & (values at k bins, activation='linear')$
$\end{tabular}$
$\end{threeparttable}$
$\label{tab:arch}$
$\end{table}$
$In Figure \ref{fig:comp_21_pow}, we show the comparison between true 21 cm power amplitude against the prediction from the trained network for the test samples. Each panel shows the six different bins considered in this study. It is visually clear that the true and predicted values are well correlated with each other, signifying a good accuracy of the prediction. The overall R^2 metric score for the test set is 0.98, which also quantifies a well trained model with high predictive power. Similarly, we show the true vs prediction plot for 21 cm-density cross power spectrum in Figure \ref{fig:comp_21_mat_cross} and for cross bias in Figure \ref{fig:comp_21_bias}. The corresponding R^2 metric values are 0.99 and 0.92, respectively, which again news a significantly accurate prediction. For cross power spectra, we emulate the quantity Q_{\mathrm{HI}}\vert \Delta_{21\times\delta}^2\vert at first and then divide by Q_{\mathrm{HI}}, where the IGM is not fully ionized. This helps us to avoid any possible divergence due to fully ionized IGM in the training set. The scatter at lower amplitudes arises mainly due to the fact that these correspond to highly ionized states of the IGM and hence there is very little amount of leftover correlation information between 21 cm and the density distribution. In Figure \ref{fig:comp_samples}, we further show the comparison plots of the true model and emulator prediction for the different observables as functions of k modes using six random sets of parameter samples in our test suite. The true (in \textit{solid}) and the predicted (in \textit{dashed}) cases match reasonably well for the different models. This gives us confidence on the emulator's performance over a wide range of models. In Figure \ref{fig:mock_model}, we give an example of a fiducial model which has been discussed in section \ref{sec:gen_mock}$
$We also utilize the datasets to predict the global neutral fraction (Q_{\mathrm{HI}}), providing the same set of free parameters. Instead of a complex network, a simpler technique using Gaussian Process Regression (GPR) is sufficient to give a reasonably accurate prediction in this case, corresponding to R^2 metric score of 0.98.  In Figure \ref{fig:comp_Q}, we show the comparison between the true global neutral fraction and the corresponding predictions from GPR. These are nicely correlated with each other along the equality line with an average scatter uncertainty <5\%.$
$\iffalse \begin{figure*}$
$    \centering$
$    \includegraphics[width=0.7\textwidth]{21cm_cosmology/mock_models.pdf}$
$    \caption{Top panel: Snapshots of Density, collapsed fraction, and neutral fraction field (gradually from left to right) for the fiducial model utilized to generate the mock dataset. Bottom panel: The 21 cm power spectra, 21 cm and density cross power spectra, and the bias of cross spectra for the corresponding model, gradually from left to right.}$
$    \label{fig:mock_model}$
$\end{figure*}\fi$
$\begin{figure*}$
$    \sidecaption$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/corner_comp_fiducial.pdf}$
$    \caption{Comparison of posterior distributions using different combinations of observables i.e. only 21 cm power spectra (red), 21 cm power spectra + 21cm-density cross power spectra (green), and adding bias of cross spectra (blue). The diagonal panels show the 1D posterior probability distribution, and the off diagonal panels show the joint 2D posteriors. The contours represent the 68\% and 95\% confidence intervals. The dashed line represents the input parameter values used to generate the mock dataset. The observational uncertainties are assumed to be 5\% of the observable amplitudes in this case.}$
$    \label{fig:comp_posterior_fiducial}$
$\end{figure*}$
$\begin{figure*}$
$    \sidecaption$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/corner_comp_QHI.pdf}$
$    \caption{Comparison of posterior distributions for mocks corresponding to different ionization states. Along with the fiducial case (Q_{\mathrm{HI}}=0.47, in blue), we show two more cases for lower (Q_{\mathrm{HI}}=0.29, in orange)  and higher neutral fraction (Q_{\mathrm{HI}}=0.65, in magenta). The observational uncertainties are assumed to be 5\% of the amplitudes as in Figure \ref{fig:comp_posterior_fiducial}.}$
$    \label{fig:comp_posterior_QHI}$
$\end{figure*}$
$\begin{figure*}$
$    \sidecaption$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/corner_comp_uncertain.pdf}$
$    \caption{Comparison of posterior distributions for mocks corresponding to different observational uncertainties assumed in the likelihood. The blue contours correspond to the fiducial case with 5\% uncertainties, as also shown in Figure \ref{fig:comp_posterior_fiducial} and \ref{fig:comp_posterior_QHI}; while the green contours correspond to a variant with 10\% observational uncertainties for all the observables used.}$
$    \label{fig:comp_posterior_uncertain}$
$\end{figure*}$
$\section{Generating the mock data}$
$\label{sec:gen_mock}$
$We choose a fiducial set of parameters to generate the mock observables. For exploration studies, we fix n_s=0.961 and w_0=-1, consistent with the CMB estimates  ([Aghanim and Akrami 2020]()) . The rest of the parameter values corresponding to these fiducial mocks are chosen as \Omega_m=0.308, h=0.678, \sigma_8=0.829, \zeta=15, and \log M_{\mathrm{min}}=9.0. The values are shifted by a Gaussian random noise with a standard deviation consistent with the expected level of SKA-Low (AA*) thermal noise with 1000 hrs of observations. These shifted values are treated as the mock dataset for our analysis. The total errors on the mock dataset are assumed to have two contributions i.e. training uncertainties (\sigma_\mathcal{D}^{tr}) and the overall observational uncertainties (\sigma_\mathcal{D}^{obs}). We compute the training uncertainties by quantifying the scatter in True vs Predicted observable distributions. Specifically, we estimate 84\% and 16\% quantiles for (True-Predicted) distributions and then take half the difference between those two quantiles to get \sigma_\mathcal{D}^{tr}. For observational uncertainties, we assumed a moderate value, 5\% of the observable amplitude at the corresponding k bins. This is motivated by the expected SNR (\gtrsim20) on 21 cm power spectra from SKA-Low AA* observations for 1000 hrs with an optimistic foreground scenario \footnote{\url{https://21cmsense.readthedocs.io/en/latest/tutorials/SKA_forecast.html}}. We also show a case with a more conservative uncertainty, assuming 10\% of the amplitude.  Then the total uncertainties are estimated by adding these  contributions in quadrature as$
$\be$
$\sigma_\mathcal{D}^{tot} = \sqrt{(\sigma_\mathcal{D}^{tr})^2 + (\sigma_\mathcal{D}^{obs})^2}$
$\ee$
$At the top panels in Figure \ref{fig:mock_model}, we show the different cosmological fields, including matter density (\Delta), collapsed fraction (f_{\mathrm{coll}}), and neutral fraction (x_{\mathrm{HI}}). The fluctuations in these fields are nicely correlated with each other. At the bottom panels, we show the corresponding observables i.e. 21 cm power spectra (\langle\delta T_b\rangle^2\Delta_{21}^2), 21cm-density cross power spectra (\vert\Delta_{21\times\delta}^2\vert), and bias (b^2_{21\times\delta}). The black data points with errorbars are utilized for the parameter space exploration studies, while the blue dashed line represents the underlying input model. We adopt a conservative approach by using only two k bins with the better prediction uncertainties (among all the 6) for the 21 cm bias, as this has a relatively lower prediction accuracy among the three estimators considered in this study. This helps us to minimize the bias coming from poor emulator predictions.$
$We further study utilizing two variants of the mock dataset using different input models corresponding to different ionization states. These are generated by changing the \zeta values appropriately. To ensure robustness,  we check the results with a smaller (\zeta=10) and a larger (\zeta=20) value than the fiducial ones as discussed later in section \ref{sec:results}.$
$\section{Parameter exploration with emulator}$
$\label{sec:param_exp}$
$We employed a standard Bayesian framework to explore the parameter space of our model. Our objective was to compute the posterior probability distribution, \mathcal{P}(\lambda \vert \mathcal{D}), of the model parameters \lambda, conditioned on the observational (mocks in this case) datasets \mathcal{D} introduced in the previous section. According to Bayes’ theorem, the posterior is given by$
$\begin{equation}$
$\label{eq:bayes_eq}$
$\mathcal{P}(\lambda \vert \mathcal{D}) = \frac{\mathcal{L}(\mathcal{D} \vert \lambda), \pi(\lambda)}{\mathcal{P}(\mathcal{D})},$
$\end{equation}$
$where \mathcal{L}(\mathcal{D} \vert \lambda) denotes the likelihood, \pi(\lambda) represents the prior distribution, and \mathcal{P}(\mathcal{D}) is the Bayesian evidence. The evidence serves as a normalization constant and does not influence our parameter inference.$
$The likelihood function was modeled as a multivariate Gaussian distribution,$
$\be$
$\label{eq:chisq_eq}$
$\mathcal{L}(\mathcal{D} \vert \lambda)$
$=\exp \left(-\frac{1}{2} \sum_{\alpha}\left[\frac{\mathcal{D}(k_{\alpha})-\mathcal{M}(k_{\alpha}; \lambda)}{\sigma_\mathcal{D}^{tot}(k_{\alpha})}\right]^2 \right)$
$\ee$
$Here \mathcal{D} corresponds to different mock observables/estimators (i.e. \langle\delta T_b\rangle^2\Delta_{21}^2, \vert \Delta_{21\times\delta}^2\vert and b^2_{21\times\delta}) and \mathcal{M} is the corresponding predictions for parameter set \lambda.$
$To sample the posterior distribution, we utilized the Markov Chain Monte Carlo (MCMC) method, employing the Metropolis–Hastings algorithm  ([Metropolis, et. al 1953]()) . The MCMC chains were executed using the publicly available \texttt{cobaya} package  ([ and Torrado 2021]()) \footnote{\url{https://cobaya.readthedocs.io/en/latest/}}.$
$We checked the convergence of the chains following the  Gelman-Rubin R - 1 statistic  ([ and Gelman 1992]()) . The chain was assumed to have converged when the R - 1 value was lower than a threshold 0.01.$
$For subsequent analysis, we discarded the initial 30\% of samples from each chain as burn-in and based our inference on the remaining samples.$
$\begin{table*}$
$\centering$
$\caption{Parameter constraints obtained from the MCMC-based analysis for different scenarios using mock dataset.}$
$\newcommand{\arraystretch}{0.5}$
$\setlength{\tabcolsep}{5.5pt}$
$\small$
$\begin{threeparttable}$
$\begin{tabular}{cccccccc}$
$\hline$
$\ Parameters  & Input & \multicolumn{4}{c}{\langle\delta T_b\rangle^2\Delta_{21}^2 + \vert \Delta_{21\times\delta}^2\vert + b^2_{21\times\delta}} & \langle\delta T_b\rangle^2\Delta_{21}^2 + \vert \Delta_{21\times\delta}^2\vert & \langle\delta T_b\rangle^2\Delta_{21}^2   \ &  &  & &&&& \ (95\% limits) &  &  (Q_{\mathrm{HI}}=0.47)  &  (Q_{\mathrm{HI}}=0.47) & (Q_{\mathrm{HI}}=0.65)& (Q_{\mathrm{HI}}=0.29)&$
$  (Q_{\mathrm{HI}}=0.47) &  (Q_{\mathrm{HI}}=0.47)  \  &  &  & &&&& \ &  & ( 5\% obs. err.) & ( 10\% obs. err.) & ( 5\% obs. err.) & ( 5\% obs. err.) & ( 5\% obs. err.) & ( 5\% obs. err.)\\\ \hline$
$&  &  & &&&& \  \hline$
$&  &  & &&&& \   \Omega_m &0.308  &[>0.26]   & [>0.25]    & [>0.26] & [>0.26]  & [>0.27] & [>0.25]\ \   h & 0.678 &0.69 [0.66, 0.73]    & 0.69 [0.63, 0.74]     & 0.70 [0.65, 0.75] & 0.68 [0.61,0.78] & 0.69 [0.65,0.73] & [<0.77]\ \     \sigma_8 & 0.829 &0.80 [0.75, 0.85]   & 0.80 [0.73, 0.86]     & 0.79 [0.73, 0.85] & [<0.83] &[<0.86] & -\ \ Q_{\mathrm{HI}}& - & 0.47 [0.44, 0.50]   & 0.47 [0.43, 0.50]     & 0.64 [0.60, 0.68] & 0.29 [0.23, 0.33] & 0.46 [0.42, 0.50] & 0.46 [0.34, 0.57] \ \ \hline \ \end{tabular}$
$\end{threeparttable}$
$\label{tab:param_cons}$
$\tablefoot{For each case, we show the 95\% confidence limits on the parameters in the brackets. We new the mean posterior values where the bounds are available from both sides. }$
$\end{table*}$
$\section{Results}$
$\label{sec:results}$
$\iffalse \begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/corner_comp_fiducial.pdf}$
$    \caption{Comparison of posterior distributions using different combinations of tracers i.e. only 21 cm power spectra (red), 21 cm and density cross spectra (green), and including bias of cross spectra (blue). The diagonal panels show the 1D posterior probability distribution, and the off diagonal panels show the joint 2D posteriors. The contours represent the 68\% and 95\% confidence intervals. The dashed line represents the input parameter values used to generate the mock dataset.}$
$    \label{fig:comp_posterior_fiducial}$
$\end{figure*}$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=0.9\textwidth]{21cm_cosmology/corner_comp_QHI.pdf}$
$    \caption{Comparison of posterior distributions for mocks corresponding to different ionization states. Along with the fiducial case (Q_{\mathrm{HI}}=0.47, in blue), we show two more cases for lower (Q_{\mathrm{HI}}=0.29, in orange)  and higher neutral fraction (Q_{\mathrm{HI}}=0.65, in magenta).}$
$    \label{fig:comp_posterior_QHI}$
$\end{figure*}\fi$
$In this section, we discuss the findings from our parameter space exploration studies using mock datasets. In Table \ref{tab:param_cons}, we new the 95\% confidence limits along with mean of the recovered cosmological parameters as well as the ionization state for different scenarios considered in this study.$
$In Figure \ref{fig:comp_posterior_fiducial}, we show the posterior recoveries of the free parameters from the fiducial mock dataset. The free parameters include both cosmological (\Omega_m, h, \sigma_8) as well as astrophysical ones (\zeta, \log M_{\mathrm{min}}). We also show the posterior of the globally averaged neutral fraction (Q_{\mathrm{HI}}) as a derived parameter. We find that the parameters are not well constrained for the case where we utilize only 21 cm power spectra (shown in red), although it can correctly recover the global neutral fraction with wide uncertainties. The constraints are improved significantly when we include 21cm-density cross power spectra as observables along with 21 cm power spectra (shown in lime green). Specifically, the Hubble parameter (h) is constrained within an uncertainty of <6\% at a confidence interval of 95\%. This signifies the potential of 21 cm and synergies with galaxy observables as an independent probe to constrain the expansion rate of the universe, which can further shed light onto the well known Hubble tension  ([Riess, et. al 2019](), [Verde, Treu and Riess 2019]()) . Furthermore, the global neutral fraction is now stringently constrained, discarding a significant portion of astrophysical parameter spaces. On top of that, if we include the bias of cross spectra, it further constrains \sigma_8 parameter (providing <10\% uncertainty at 95\% confidence). This happens as the combination now has the information on the amplitude of the underlying matter power spectra (see equation \ref{eq:bias}), which is controlled by \sigma_8. We also note that the reionization source parameters are also well recovered, and the uncertainties subsequently improve as we include more observables. However, \Omega_m is bounded by only one side due to strong degeneracy with astrophysical parameters.$
$To check the robustness of the findings, we further pursue parameter space exploration using two more mock datasets with different ionization states. We tune the ionizing efficiency parameter to generate the mocks with a higher (Q_{\mathrm{HI}}=0.65) and a lower (Q_{\mathrm{HI}}=0.29) neutral fraction than the fiducial one (Q_{\mathrm{HI}}=0.47), while all the other input parameters are kept the same as before. In Figure \ref{fig:comp_posterior_QHI}, we show the recovered posteriors of these cases along with the fiducial one. We find that the h and \sigma_8 parameters are well constrained even for the higher neutral fraction, recovering the underlying true values within 95\% uncertainties as before. The astrophysical parameters are also consistent with the input values. On the other hand, the constraints on the parameters for lower neutral fraction are not significantly strong, barely constraining h and providing one sided bound on \sigma_8 at 95\% uncertainty level. This is not very surprising as the ionized bubbles start to overlap when the universe is highly ionized (lower neutral fraction) which can wipe out the correlation information, resulting in a loss of constraining power.  However, the neutral fraction has still been recovered with significant precision without any strong bias. This also confirms the fact that the 21 cm observables are more sensitive to the ionization state of the universe rather than the underlying cosmological information.$
$Lastly, we check the effects of observational uncertainties on the posterior distribution in Figure \ref{fig:comp_posterior_uncertain}. The green contours show the case where we assume the uncertainties to be 10\% of the observable amplitudes, while the other one is same as the fiducial case with 5\% uncertainties. Not surprisingly, the contour widens for larger uncertainties, however, it still manages to correctly recover the Hubble parameter and amplitude of primordial fluctuations with significant confidence.$
$\section{Summary and conclusions}$
$\label{sec:conc}$
$The astrophysics during the Epoch of Reionization is gradually getting explored with the help of multi-wavelength observables. The 21 cm signal is one of the crucial probes which has the potential to detect neutral hydrogen fluctuations at EoR directly. This further contains useful information about the cosmological parameters, although it is hard to infer cosmology from this weak signal, affected by foreground contamination and poorly understood high redshift astrophysical phenomena. To this end, the cross-correlation of 21 cm signal with other tracers of cosmological density can be a complementary probe of astrophysics and cosmology. This is useful to avoid any systematics arising due to spurious correlation and, hence, enhance the signal to noise ratio of detection. In this study, we check the prospects of 21 cm-density cross power and its bias along with 21 cm power spectra in order to probe the astrophysics and cosmology from the EoR. Our approach relies on creating an efficient emulator of the observables and utilizing the emulator for further parameter space exploration. Below, we summarize this work, highlighting the main findings.$
$\begin{itemize}$
$    \item We used a realistic semi-numerical reionization model based on a photon-conserving algorithm to study the prospects of 21 cm and related observables to infer cosmology and astrophysics during the EoR. Specifically, we used 21 cm auto power spectra, magnitude of cross power between  21 cm fluctuations and matter density, and the corresponding bias magnitude. As a prospective study, we focused only at a single redshift i.e. z=7.0 in this work. While 21 cm auto power spectra can be observed directly by the radio interferometers,  21cm-density cross power spectra and the bias can not be measured directly.$
$    However, the cross power and its bias can be in principle be estimated by different tracers, especially via galaxy-21cm cross-correlation.$
$    We created a total of  \sim7000 samples by varying different astrophysical and cosmological parameters to build the emulator for these observables/estimators. The samples were generated with different initial random seeds, which further takes into account for the cosmic variance uncertainties. The emulators were trained to predict the observables at 6 different k bins, given a set of input free parameters (including astrophysical and cosmological ones). The bins were chosen in the range where we can expect the detection of 21 cm signal from the upcoming telescopes like SKA-Low. We compared the emulators against true values and found that the predictions are sufficiently accurate, providing R^2 values >0.9 for all the cases (0.98 for 21 cm auto power spectra, 0.99 for cross power spectra, 0.92 for bias amplitude). This newd us with the confidence to do efficient parameter space exploration utilizing the emulators.$
$    \item Next, we generated the mock observables with a fiducial set of parameter values, consistent with [Aghanim and Akrami (2020)]() and providing an ionization state close to the middle of reionization process. We found that 21 cm power spectra alone can not constrain the cosmological parameters, while they can recover the correct ionization state. When we included cross power spectra as another observable, Hubble parameter was constrained and adding bias magnitude on top of it further constrained the amplitude of primordial matter fluctuations (\sigma_8). Similarly, the constraints on the ionization state were also improved significantly. We further pursued a similar analysis with two more mock datasets corresponding to a higher and lower neutral fraction. The recoveries were degraded for lower neutral fraction due to a possible lack of correlation information. However, the ionization states were still precisely recovered for all the cases.$
$\end{itemize}$
$We would like to caution that the exact quantification of constraints is dependent on the emulator uncertainties, which can be improved with larger datasets spanning wider parameter spaces and with more sophisticated training techniques. We also neglect any covariance between the Fourier modes as well as between observables while computing the likelihood. While the mutual covariances would be ideal to include and may probably degrade the uncertainties, the estimates of 21 cm observables are generally newd without the covariances information in the literature  (ghara2025) . Similarly, multi-observable inference studies usually neglect covariance information between the observables  ([ and Maity 2022](), [Qin and Mesinger 2025]()) . Hence, we proceed with the diagonal terms, assuming relatively conservative uncertainties and mutually independent observables. Further, one needs to be cautious while interpreting cosmology with real observational data, as the observational estimates (limits till now) are often derived assuming an underlying cosmology, which should be properly quantified and corrected before inference. All these aspects may be important and will be useful to check with a separate study in the future. To this end, the main conclusions of this study utilizing mocks are unlikely to be changed much, providing an insight into the applicability of 21 cm and corresponding synergies as EoR/cosmology probe.$
$The detection prospect of cross power spectrum signal between 21 cm and galaxies are very bright, given the ongoing and upcoming major observational facilities like HERA, SKA, ELT, NGRST etc. For example, HERA-NGRST cross-correlation can new a 14\sigma detection with an assumption 500 square-deg common survey area  ([Mirocha, et. al 2023]()) , while the detection can be improved to 55\sigma with SKA-Low AA*  ([Gagnon-Hartman, Davies and Mesinger 2025]()) . There also exists exciting potential for cross-correlation between intensity maps of metals like CII, CO and 21 cm, where a \sim7\sigma detection is possible with available instruments  ([ and Fronenberg 2024]()) .  To this end, our study news an expectation on astrophysical as well as cosmological inference during reionization from 21 cm, its cross-correlation with dark matter density and corresponding bias. We chose the direct cross-correlation between 21 cm and density to avoid any further astrophysical uncertainties associated with any specific tracers which also helps to build up efficient emulators.  Although the cross-correlation between 21 cm and dark matter density can not be measured directly, its bias can be derived and these can be useful indirect estimators utilizing the future observations.$
$ Currently, we use a simplistic two parameter reionization model in this study while the realistic universe is expected to be much complex. For example, in a more realistic model, one needs additional parameters such as the IGM clumping factor and temperature increment for photoionization heating, to quantify the effect of the inhomogeneous recombination and radiative feedback processes  ([ and Maity 2022]()) .  As a natural consequence of a more complex model, the number of training samples is expected to be larger to capture the whole parameter space, along with additional degeneracies between the parameters.  Some of the degeneracies can be alleviated by utilizing complementary reionization probes such as UV luminosity function (UVLFs), Ly-\alpha forest fluctuations,  CMB scattering optical depth etc  ([Qin, et. al 2021](), [ and Maity 2022](), [Qin and Mesinger 2025]()) .  However, a two parameter vanilla model is often sufficient to new the typical nature and amplitude of the fluctuations in the 21 cm field and the state of the IGM  ([ and Maity 2023]()) , which serves the purpose of this proof of concept study. In the future, we would like to explore with a more realistic reionization model, including above mentioned effects if those affect the cosmological inference. Similarly, the prospects for more direct tracers such as cross correlation between 21 cm and Ly-\alpha emitters density (instead of dark matter density), can be explored, avoiding any assumption of linear scale independent galaxy bias. Parallelly, we would like to extend the study with more redshifts, incorporating full information of high redshift 21 cm observations. Eventually, these can be jointly explored with other EoR and cosmic dawn probes to simultaneously constrain astrophysics and cosmology.$
$\begin{acknowledgements}$
$     The author thanks Prof. Tirthankar Roy Choudhury for the comments on the draft, which have been helpful to improve the manuscript.$
$\end{acknowledgements}$
$-------------------------------------------------------------------$
$\bibliographystyle{aa}$
$\bibliography{script_emu}$
$\end{document}$
$Example below of non-structurated natbib references$
$To use the v8.3 macros with this form of composition of bibliography,$
$the option "bibyear" should be added to the command line$
$"\documentclass[bibyear]{aa}".}\end{document}\end{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand{\nline}{\notag \\}$
$\newcommand{\f}{\frac}$
$\newcommand{\de}{\mathrm{d}}$
$\newcommand{\del}{\partial}$
$\newcommand{\half}{\frac{1}{2}}$
$\newcommand{\im}{\mathrm{i}}$
$\newcommand{\e}{\mathrm{e}}$
$\newcommand{\Msun}{\mathrm{M}_{\odot}}$
$\newcommand{\eqn}[1]{equation~(\ref{#1})}$
$\newcommand{\eqns}[2]{equations~(\ref{#1}) and~(\ref{#2})}$
$\newcommand{\secn}[1]{Section~\ref{#1}}$
$\newcommand{\appndx}[1]{Appendix~\ref{#1}}$
$\newcommand{\fig}[1]{Fig.~\ref{#1}}$
$\newcommand{\figs}[1]{Figs.~\ref{#1}}$
$\newcommand{\tab}[1]{Table~\ref{#1}}$
$\newcommand{\BM}[1]{{\color{blue}[{\bf }#1]}}$
$\newcommand{\red}[1]{{\color{red} #1}}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{0.5}$
$\newcommand\bear{#1}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# An emulator-based forecasting on astrophysics and cosmology with 21 cm and density cross-correlations during EoR

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2509.05096-b31b1b.svg)](https://arxiv.org/abs/2509.05096)<mark>Appeared on: 2025-09-08</mark> -  _Accepted for publication in A&A, 12 pages, 9 figures_

</div>
<div id="authors">

<mark>B. Maity</mark>

</div>
<div id="abstract">

**Abstract:** The 21 cm signal arising from fluctuations in the neutral hydrogen field, and its cross-correlation with other tracers of cosmic density, are promising probes of the high-redshift Universe. In this study, we assess the potential of the 21 cm power spectrum, along with its cross power spectrum with dark matter density and associated bias, to constrain both astrophysics during the reionization era and the underlying cosmology. Our methodology involves emulating these estimators using an Artificial Neural Network (ANN), enabling efficient exploration of the parameter space. Utilizing a photon-conserving semi-numerical reionization model, we construct emulators at a fixed redshift ( $z = 7.0$ ) for $k$ -modes relevant to upcoming telescopes such as SKA-Low. We generate $\sim7000$ training samples by varying both cosmological and astrophysical parameters along with initial conditions, achieving high accuracy when compared to true simulation outputs. While forecasting, the model involves five free parameters: three cosmological ( $\Omega_m$ , $h$ , $\sigma_8$ ) and two astrophysical (ionizing efficiency, $\zeta$ , and minimum halo mass, $M_{\mathrm{min}}$ ). Using a fiducial model at the mid-reionization stage, we create a mock dataset and perform forecasting with the trained emulators. Assuming a 5 \% observational uncertainty combined with emulator error, we find that the 21 cm and 21 cm-density cross power spectra can constrain the Hubble parameter ( $h$ ) to better than 6 \% at a confidence interval of 95 \% , with tight constraints on the global neutral fraction ( $Q_{\mathrm{HI}}$ ). The inclusion of bias information further improves constraints on $\sigma_8$ (< 10 \% at 95 \% confidence). Finally, robustness tests with two alternate ionization states and a variant with higher observational uncertainty show that the ionization fractions are still reliably recovered, even when cosmological constraints weaken.

</div>

<div id="div_fig1">

<img src="tmp_2509.05096/./21cm_cosmology/True_predicted_comp_21cm.png" alt="Fig3" width="100%"/>

**Figure 3. -** Comparison of True 21 cm power spectrum and corresponding predicted estimates using ANN at different $k$ bins used in this work. The black points correspond to test dataset while the red line signifies True=Prediction. (*fig:comp_21_pow*)

</div>
<div id="div_fig2">

<img src="tmp_2509.05096/./21cm_cosmology/True_predicted_comp_21cm.png" alt="Fig7" width="100%"/>

**Figure 7. -** Comparison of true 21 cm power spectrum and corresponding predicted estimates using ANN at different $k$ bins used in this work. The black points correspond to test dataset while the red line signifies True=Prediction. The corresponding $R^2$ value is 0.98. (*fig:comp_21_pow*)

</div>
<div id="div_fig3">

<img src="tmp_2509.05096/./21cm_cosmology/corner_comp_fiducial.png" alt="Fig13" width="100%"/>

**Figure 13. -** Comparison of posterior distributions using different combinations of observables i.e. only 21 cm power spectra (red), 21 cm power spectra + 21cm-density cross power spectra (green), and adding bias of cross spectra (blue). The diagonal panels show the 1D posterior probability distribution, and the off diagonal panels show the joint 2D posteriors. The contours represent the 68\% and 95\% confidence intervals. The dashed line represents the input parameter values used to generate the mock dataset. The observational uncertainties are assumed to be 5\% of the observable amplitudes in this case. (*fig:comp_posterior_fiducial*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2509.05096"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

132  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
