# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Li  ->  J. Li  |  ['J. Li']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
J. Li  ->  J. Li  |  ['J. Li']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']


X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Matharu  ->  J. Matharu  |  ['J. Matharu']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Villasenor  ->  J. Villasenor  |  ['J. Villasenor']


Arxiv has 131 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2510.10067


extracting tarball to tmp_2510.10067... done.
Retrieving document from  https://arxiv.org/e-print/2510.10922


extracting tarball to tmp_2510.10922...

 done.
Retrieving document from  https://arxiv.org/e-print/2510.11363
extracting tarball to tmp_2510.11363... done.
Retrieving document from  https://arxiv.org/e-print/2510.11373


extracting tarball to tmp_2510.11373...

 done.


J. Matharu  ->  J. Matharu  |  ['J. Matharu']


Found 67 bibliographic references in tmp_2510.11373/C3D_OIII_LF.bbl.
Retrieving document from  https://arxiv.org/e-print/2510.11385


extracting tarball to tmp_2510.11385... done.
Retrieving document from  https://arxiv.org/e-print/2510.11404


extracting tarball to tmp_2510.11404...

 done.
Retrieving document from  https://arxiv.org/e-print/2510.11528


extracting tarball to tmp_2510.11528...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.11373-b31b1b.svg)](https://arxiv.org/abs/2510.11373) | **JWST COSMOS-3D: Spectroscopic Census and Luminosity Function of [O III] Emitters at 6.75<z<9.05 in COSMOS**  |
|| R. A. Meyer, et al. -- incl., <mark>J. Matharu</mark> |
|*Appeared on*| *2025-10-14*|
|*Comments*| *Submitted to A&A. 10 pages + appendices. [OIII] catalogue release after acceptance. Comments welcome!*|
|**Abstract**|            We present a spectroscopically-selected [OIII]+Hb emitters catalogue at 6.75<z<9.05 and the resulting [OIII] 5008 ÅLuminosity Function (LF) in the COSMOS field. We leverage the 0.3 deg$^{2}$ covered to date by COSMOS-3D using NIRCam/WFSS F444W (90% of the survey) to perform the largest spectroscopic search for [OIII] emitters at 6.75<z<9.05. We present our catalogue of 237 [OIII] emitters and their associated completeness function. The inferred constraints on the [OIII] LF enable us to characterise the knee of the [OIII] LF, resulting in improved [OIII] LF constraints at z~7,8. Notably, we find evidence for an accelerated decline of the [OIII] luminosity density between z~7 and z~8, which could be expected if the metallicity of [OIII] emitters, as well as the cosmic star-formation rate density, is declining at these redshifts. We find that theoretical models that reproduce the z~7,8 [OIII] LF do not reproduce well the [OIII] equivalent width distribution, pointing to potential challenges in the modelling of[OIII] and other nebular lines in the early Universe. Finally, we provide the first constraints on the cosmic variance of [OIII] emitters, estimating at 15% the relative uncertainty for the z~7,8 [OIII] LF in the 0.3 deg$^2$ field. This estimate is in good agreement with that inferred from clustering, and shows that the [OIII] LF derived from smaller extragalactic legacy fields is strongly affected by cosmic variance. Our results highlight the fundamental role that wide-area JWST slitless surveys play to map the galaxy large-scale structure down into the reionisation era, serving as a springboard for a variety of science cases.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.10067-b31b1b.svg)](https://arxiv.org/abs/2510.10067) | **Beamforming in Interferometer Arrays with Cross-couplings**  |
|| Y. Liu, et al. -- incl., <mark>J. Li</mark>, <mark>Y. Wang</mark> |
|*Appeared on*| *2025-10-14*|
|*Comments*| *18 pages, 12 figures, RAA accepted*|
|**Abstract**|            For an interferometric array, an image of the sky can be synthesized from interferometric visibilities, which are the cross-correlations of the received electric voltages of pairs of array elements. However, to search for transient targets such as the fast radio burst (FRB), it is more convenient to use the beam-forming technique, where the real-time voltage outputs of the array elements are used to generate data streams (beams) which are sensitive to a specific direction. This is usually achieved by a weighted sum of the array element voltages, with the complex weight adjusted so that all outputs have the same phase for that direction. Alternatively, beams can also be formed from the weighted sum of the short time averaged correlation (visibility) data. We shall call these two approaches the electric voltage beam forming (EBF) and cross-correlation beam forming (XBF), respectively. All beams formed with the EBF can also be formed by the XBF method, but the latter can also generate beams which can not be generated by the former. We discuss the properties of these two kinds of beams, and the amount of computation required in each case. For an array with large number of elements, the XBF would require much more computation resource, although this is partly compensated by the fact that it allows integration over time. We study the impact of cross-coupling between array elements on the beamforming, first using a toy model, then for the case of the Tianlai Cylinder Pathfinder Array. In both cases, we find that the impact of the cross-coupling on the beam profile is relatively small. The understanding gained in this study is helpful in designing and understanding the beam-forming FRB digital backend for compact arrays such as the Tianlai array.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.10922-b31b1b.svg)](https://arxiv.org/abs/2510.10922) | **Slitless Spectroscopy Source Detection Using YOLO Deep Neural Network**  |
|| X. Chen, et al. -- incl., <mark>J. Li</mark>, <mark>X. Zhang</mark> |
|*Appeared on*| *2025-10-14*|
|*Comments*| *22 pages, 11 figures, 7 tables. PASP accepted*|
|**Abstract**|            Slitless spectroscopy eliminates the need for slits, allowing light to pass directly through a prism or grism to generate a spectral dispersion image that encompasses all celestial objects within a specified area. This technique enables highly efficient spectral acquisition. However, when processing CSST slitless spectroscopy data, the unique design of its focal plane introduces a challenge: photometric and slitless spectroscopic images do not have a one-to-one correspondence. As a result, it becomes essential to first identify and count the sources in the slitless spectroscopic images before extracting spectra. To address this challenge, we employed the You Only Look Once (YOLO) object detection algorithm to develop a model for detecting targets in slitless spectroscopy images. This model was trained on 1,560 simulated CSST slitless spectroscopic images. These simulations were generated from the CSST Cycle 6 and Cycle 9 main survey data products, representing the Galactic and nearby galaxy regions and the high galactic latitude regions, respectively. On the validation set, the model achieved a precision of 88.6% and recall of 90.4% for spectral lines, and 87.0% and 80.8% for zeroth-order images. In testing, it maintained a detection rate >80% for targets brighter than 21 mag (medium-density regions) and 20 mag (low-density regions) in the Galactic and nearby galaxies regions, and >70% for targets brighter than 18 mag in high galactic latitude regions.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.11363-b31b1b.svg)](https://arxiv.org/abs/2510.11363) | **Updated constraints on interacting dark energy: A comprehensive analysis using multiple CMB probes, DESI DR2, and supernovae observations**  |
|| T.-N. Li, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-10-14*|
|*Comments*| *17 pages, 3 figures*|
|**Abstract**|            Recent DESI baryon acoustic oscillation (BAO) measurements, combined with Planck cosmic microwave background (CMB) data and DESY5 type Ia supernova (SN) data, indicate a significant deviation from $\Lambda$CDM, which seems to suggest that this deviation can be explained by an interaction between dark energy and dark matter. In this work, we perform a comprehensive analysis by utilizing the latest DESI DR2 BAO data in conjunction with CMB data from ACT, SPT, Planck, and WMAP, along with SN data from PantheonPlus and DESY5. We consider four interacting dark energy (IDE) models with different forms of the interaction term $Q$. Our analysis indicates that CMB experiments other than Planck enhance the evidence for an interaction in the IDE models with $Q \propto \rho_{\rm de}$. In particular, when using the SPT+DESI+DESY5 data, the IDE model with $Q = \beta H_0 \rho_{\rm de}$ gives $\beta = -0.4170 \pm 0.1220$, with a deviation from zero reaching $3.4\sigma$ level. When replacing DESY5 with PantheonPlus, this deviation weakens to $2.1\sigma$ level, but remains relatively significant. Furthermore, the Bayes factors of the IDE model with $Q = \beta H_0 \rho_{\rm de}$ are positive in all cases, providing a moderate-to-strong preference over $\Lambda$CDM. Overall, our comprehensive analysis clearly suggests that the IDE models with $Q \propto \rho_{\rm de}$ (especially, $Q = \beta H_0 \rho_{\rm de}$) provide strong evidence supporting the existence of interaction and are more preferred by the current cosmological data.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.11385-b31b1b.svg)](https://arxiv.org/abs/2510.11385) | **Unveil A Peculiar Light Curve Pattern of Magnetar Burst with GECAM observations of SGR J1935+2154**  |
|| <mark>Y. Wang</mark>, et al. |
|*Appeared on*| *2025-10-14*|
|*Comments*| *13 pages, 5 figures, accepted to publication on ApJ*|
|**Abstract**|            Magnetar X-ray Burst (MXB) is usually composed of a single pulse or multiple pulses with rapid rise and brief duration mostly observed in hard X-ray (soft gamma-ray) band. Previous work studied the temporal behavior of some magnetar bursts and employed the Fast Rise Exponential Decay (FRED) model to fit pulses of MXB. However, whether there is other kind of pulse shape has not been explored. In this study, we systematically examined light curve of MXBs from SGR J1935+2154 detected by GECAM between 2021 and 2022. We find that there are different light curve morphologies. Especially, we discover a peculiar and new pattern, Exponential Rise and Cut-Off Decay (ERCOD), which is significantly different from FRED and could be well described by a mathematical function we proposed. We find that MXBs with ERCOD shape are generally longer in duration, brighter in the peak flux, and harder in spectrum. We note that the ERCOD shape is not unique to SGR J1935+2154 but also present in other magnetars. This new light curve pattern may imply a special burst and radiation mechanism of magnetar.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.11404-b31b1b.svg)](https://arxiv.org/abs/2510.11404) | **Observational study of chromospheric jets in and around a sunspot observed by NVST and SDO**  |
|| G. Wu, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-10-14*|
|*Comments*| *18 pages, 11 figures, accepted for publication in ApJ*|
|**Abstract**|            To better understand the characteristics, driving mechanisms, and potential heating contributions of chromospheric jets, we analyze two contrasting types: one originating from within the sunspot penumbra (inside jets), and the other originating from outside the penumbra (outside jets). Statistical analysis of 100 jets (50 inside jets and 50 outside jets) reveals that inside jets have a projected velocity range of 4--14~km\,s$^{-1}$, a length range of 1--4~Mm, a width range of 0.2--0.6~Mm, and a lifetime range of 135--450~s, with mean values of 7.90~km\,s$^{-1}$, 2.61~Mm, 0.41~Mm, and 260~s, respectively. About 52\% of inside jets are associated with brightenings in H$\alpha$ blue wing images, and some show high-temperature signatures, suggesting a connection with localized energy release. In contrast, outside jets have higher velocities (8--50~km\,s$^{-1}$, average 19.04~km\,s$^{-1}$), greater lengths (average 6.26~Mm, up to 27.27~Mm), slightly larger widths (average 0.46~Mm), and longer lifetimes (135--630~s, average 327~s). They typically originate from regions of opposite magnetic polarities and are associated with magnetic flux emergence and EUV brightenings. Some outside jets correspond to coronal jets with inverted Y-shaped structures and temperatures exceeding one million Kelvin. Our results suggest that both jet types are driven by magnetic reconnection occurring in distinct magnetic field configurations and contribute to chromospheric and coronal heating.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2510.11528-b31b1b.svg)](https://arxiv.org/abs/2510.11528) | **MANGOS II: Five new giant planets orbiting low-mass stars**  |
|| G. Dransfield, et al. -- incl., <mark>J. Villasenor</mark> |
|*Appeared on*| *2025-10-14*|
|*Comments*| *Submitted to MNRAS*|
|**Abstract**|            Giant planets orbiting low-mass stars on short orbits present a conundrum, as in the most extreme cases their existence cannot be reconciled with current models of core accretion. Therefore, surveys dedicated to finding these rare planets have a key role to play by growing the sample to overcome small number statistics. In this work we present MANGOS, a programme dedicated to the search for giant objects (planets, brown dwarfs, and low-mass stars) orbiting M dwarfs. We report on the discovery of five new giant planets (TOI-3288 Ab, TOI-4666 b, TOI-5007 b, TOI-5292 Ab, TOI-5916 b) first detected by TESS, and confirmed using ground-based photometry and spectroscopy. The five planets have radii in the range 0.99-1.12 $\mathrm{R_{Jup}}$, masses between 0.49--1.69~$\mathrm{M_{Jup}}$, and orbital periods between 1.43 and 2.91 days. We reveal that TOI-3288 and TOI-5292 are wide binaries, and in the case of TOI-5292 we are able to characterise both stellar components. We demonstrate that the planets presented are suitable for further characterisation of their obliquities and atmospheres. We detect a small but significant eccentricity for TOI-5007 b, although for this to be more robust, more observations are needed to fully sample the orbit. Finally, we reveal a correlation between stellar metallicity and planet bulk density for giant planets orbiting low-mass stars.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2510.11373/./figures/C3D_all_emitters.png', 'tmp_2510.11373/./figures/completeness_functions_5008.png', 'tmp_2510.11373/./figures/EW_O3.png']
copying  tmp_2510.11373/./figures/C3D_all_emitters.png to _build/html/
copying  tmp_2510.11373/./figures/completeness_functions_5008.png to _build/html/
copying  tmp_2510.11373/./figures/EW_O3.png to _build/html/
exported in  _build/html/2510.11373.md
    + _build/html/tmp_2510.11373/./figures/C3D_all_emitters.png
    + _build/html/tmp_2510.11373/./figures/completeness_functions_5008.png
    + _build/html/tmp_2510.11373/./figures/EW_O3.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\oiii}{\ifmmode \text{[O~{\sc iii}]} \else[O~{\sc iii}]\fi}$
$\newcommand{\neiii}{[Ne~{\sc iii}]}$
$\newcommand{\ha}{\ifmmode \text{H}\alpha \else H\alpha\fi}$
$\newcommand{\hb}{\ifmmode \text{H}\beta \else H\beta\fi}$
$\newcommand{\oiiihb}{\text{\oiii+\hb}}$
$\newcommand{\kms}{\rm{km s}^{-1}}$
$\newcommand{\arraystretch}{1.3}$
$\newcommand{\arraystretch}{1.3}$
$\newcommand{\arraystretch}{1.3}$</div>



<div id="title">

# JWST COSMOS-3D: Spectroscopic Census and Luminosity Function of $\oiii$  Emitters at $6.75\!<\!z\!<\!9.05$ in COSMOS

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2510.11373-b31b1b.svg)](https://arxiv.org/abs/2510.11373)<mark>Appeared on: 2025-10-14</mark> -  _Submitted to A&A. 10 pages + appendices. [OIII] catalogue release after acceptance. Comments welcome!_

</div>
<div id="authors">

R. A. Meyer, et al. -- incl., <mark>J. Matharu</mark>

</div>
<div id="abstract">

**Abstract:** We present a spectroscopically-selected $\oiiihb$ emitters catalogue at $6.75\!<\!z\!<\!9.05$ and the resulting $\oiii$ $5008$ Å  Luminosity Function (LF) in the COSMOS field. We leverage the 0.3 deg $^{2}$ covered  to date by COSMOS-3D using NIRCam/WFSS F444W ( $90\%$ of the survey) to perform the largest spectroscopic search for $\oiii$ emitters at $6.75\!<\!z\!<\!9.05$ . We present our catalogue of $237$ $\oiii$ emitters and their associated completeness function. The inferred constraints on the $\oiii$ LF enable us to characterise the knee of the $\oiii$ LF, resulting in improved $\oiii$ LF constraints at $z\sim 7, 8$ . Notably, we find evidence for an accelerated decline of the $\oiii$ luminosity density between $z\sim7$ and $z\sim8$ , which could be expected if the metallicity of $\oiii$ emitters, as well as the cosmic star-formation rate density, is declining at these redshifts. We find that theoretical models that reproduce the $z\sim7,8$ $\oiii$ LF do not reproduce well the $\oiii$ equivalent width distribution, pointing to potential challenges in the modelling of $\oiii$ and other nebular lines in the early Universe. Finally, we provide the first constraints on the cosmic variance of $\oiii$ emitters, estimating at $15\%$ the relative uncertainty for the z $\sim 7,8$ $\oiii$ LF in the 0.3 deg $^2$ field. This estimate is in good agreement with that inferred from clustering, and shows that the $\oiii$ LF derived from smaller extragalactic legacy fields is strongly affected by cosmic variance. Our results highlight the fundamental role that wide-area JWST slitless surveys play to map the galaxy large-scale structure down into the reionisation era, serving as a springboard for a variety of science cases.

</div>

<div id="div_fig1">

<img src="tmp_2510.11373/./figures/C3D_all_emitters.png" alt="Fig1" width="100%"/>

**Figure 1. -** 2D SNR spectra of all $237$ individual $\oiii$ emitters reported in this work. The emitters are ordered by redshift (only a subsample of redshifts are shown on the y-axis for readability). (*fig:fig1_allspectra*)

</div>
<div id="div_fig2">

<img src="tmp_2510.11373/./figures/completeness_functions_5008.png" alt="Fig2" width="100%"/>

**Figure 2. -** Completeness functions for the $\oiii$hb emitter search as a function of the measured $\oiii$ 5008 $Å$ SNR. The black square datapoints and red curve denote the effective completeness including the initial Gaussian-matched filtering (GM) and the subsequent visual inspection (VI). The best-fit completeness function of each separate step is shown with yellow and blue lines, as well as the binned data values for the visual inspection (gray dots). Note that the visual inspection is specific to the quality threshold chosen (here $q\geq1.5$).  (*fig:completeness_function*)

</div>
<div id="div_fig3">

<img src="tmp_2510.11373/./figures/EW_O3.png" alt="Fig3" width="100%"/>

**Figure 3. -** Equivalent width ($EW_0$) distribution of the $\oiii$ emitters in this work, FRESCO  ([Meyer, Oesch and Giovinazzo 2024]())  and predictions from THESAN \citep[][]{Kannan2022a,Kannan2022}  , SPHINX \citep[][]{Katz2023_sphinx}  , JAGUAR \citep[][]{Williams2018}  , FLARES \citep[][]{Lovell2021,Wilkins2023_o3}  . The combined distribution of FRESCO and COMSOS-3D, re-weighted by survey area, is shown in dashed black lines.  (*fig:ew_o3*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2510.11373"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

124  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
