# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
M. Demianenko  ->  M. Demianenko  |  ['M. Demianenko']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']


T. Henning  ->  T. Henning  |  ['T. Henning']
Arxiv has 91 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2601.00945


extracting tarball to tmp_2601.00945...

 done.
Retrieving document from  https://arxiv.org/e-print/2601.00975


extracting tarball to tmp_2601.00975... done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)




Retrieving document from  https://arxiv.org/e-print/2601.02005
extracting tarball to tmp_2601.02005...

 done.


Error retrieving bib data for Masters_2015: 'masters_2015'
Error retrieving bib data for Roster_2025: 'roster_2025'
Error retrieving bib data for Masters_2015: 'masters_2015'
Retrieving document from  https://arxiv.org/e-print/2601.02156
extracting tarball to tmp_2601.02156...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.00975-b31b1b.svg)](https://arxiv.org/abs/2601.00975) | **Website with interactive visualization of multivariate astronomical time series**  |
|| M. Volkov, et al. -- incl., <mark>M. Demianenko</mark> |
|*Appeared on*| *2026-01-06*|
|*Comments*| *7 pages, 3 figures, accepted to Communications of BAO, Vol.72, Issue 2, 2025, pp.334-340*|
|**Abstract**|            Light curves represent astronomical time series of flux measured across one or more photometric bands. With the rapid growth of large-scale sky surveys, time-domain astronomy has become an essential area of modern astrophysical research. Interactive visualization of extensive light-curve datasets plays a key role in exploring transient phenomena and in planning large follow-up campaigns. In this work, we introduce two web-based platforms designed for interactive light-curve visualization: Fulu, for transient event studies, and VALC, for investigations of low-mass active galactic nuclei (AGNs). These tools provide a user-friendly interface for examining, comparing, and interpreting vast collections of astronomical light curves, supporting scientific discovery.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.02005-b31b1b.svg)](https://arxiv.org/abs/2601.02005) | **Euclid: Improving redshift distribution reconstruction using a deep-to-wide transfer function**  |
|| Y. Kang, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2026-01-06*|
|*Comments*| **|
|**Abstract**|            The Euclid mission seeks to understand the Universe expansion history and the nature of dark energy, which requires a very accurate estimate of redshift distribution. Achieving this accuracy relies on reference samples with spectroscopic redshifts, together with a procedure to match them to survey sources for which only photometric redshifts are available. One important source of systematic uncertainty is the mismatch in photometric properties between galaxies in the Euclid survey and the reference objects. We develop a method to degrade the photometry of objects with deep photometry to match the properties of any shallower survey in the multi-band photometric space, preserving all the correlations between the fluxes and their uncertainties. We compare our transfer method with more demanding image-based methods, such as Balrog from the Dark Energy Survey Collaboration. According to metrics, our method outperforms Balrog. We implement it in the redshift distribution reconstruction, based on the self-organising map approach of arXiv:1509.03318, and test it using a realistic sample from the Euclid Flagship Simulation. We find that the key ingredient is to ensure that the reference objects are distributed in the colour space the same way as the wide-survey objects, which can be efficiently achieved with our transfer method. In our best implementation, the mean redshift biases are consistently reduced across the tomographic bins, bringing a significant fraction of them within the Euclid accuracy requirements in all tomographic bins. Equally importantly, the tests allow us to pinpoint which step in the calibration pipeline has the strongest impact on achieving the required accuracy. Our approach also reproduces the overall redshift distributions, which are crucial for applications such as angular clustering.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.02156-b31b1b.svg)](https://arxiv.org/abs/2601.02156) | **JWST/MIRI coronagraphic search for planets in systems with gapped exoKuiper belts and proper motion anomalies**  |
|| R. Bendahan-West, et al. -- incl., <mark>T. Henning</mark> |
|*Appeared on*| *2026-01-06*|
|*Comments*| *23 pages, 10 figures, Accepted for publication in MNRAS*|
|**Abstract**|            Over the past decade, ALMA has uncovered a range of substructures within exoKuiper belts, pointing to a population of undetected planets. With JWST's sensitivity, we now have the opportunity to identify these planets thought to be responsible for the observed substructures in debris discs. We present Cycle 1 JWST/MIRI 11.4 {\mu}m coronagraphic observations of three exoKuiper belts that exhibit gaps in their radial structures: HD 92945, HD 107146, and HD 206893, to determine whether planets are responsible for carving these structures, as seen in our Solar System with the gas giants. We reduce the JWST/MIRI data using spaceKLIP, and introduce new routines to mitigate the Brighter-Fatter effect and persistence. We do not detect any planet candidates, and all detected objects in the field-of-view are consistent with background stars or galaxies. However, by combining JWST mass limits, archival observational constraints, and astrometric accelerations, we rule out a significant portion of planet parameter space, placing tight constraints on the planets possibly responsible for these gaps. To interpret these results, we explore multiple gap-carving scenarios in discs, either massless or with non-zero mass, including clearing by in-situ planet(s), as well as shaping by inner planets through mean-motion or secular apsidal resonances. Finally, we conclude that the planets causing the proper motion anomaly in these systems must reside within the inner 20 au.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.00945-b31b1b.svg)](https://arxiv.org/abs/2601.00945) | **JWST observations of three long-period AM CVn binaries: detection of the donors and hints of magnetically truncated disks**  |
|| <mark>K. El-Badry</mark>, A. C. Rodriguez, M. J. Green, K. B. Burdge |
|*Appeared on*| *2026-01-06*|
|*Comments*| *18 pages, 15 figures, submitted to OJAp. Data at this https URL*|
|**Abstract**|            We present JWST/NIRSpec high-cadence infrared spectroscopy of three long-period, eclipsing AM CVn binaries, Gaia14aae, SRGeJ0453, and ZTFJ1637. These systems have orbital periods of 50-62 minutes and cool donors that are undetectable in the optical. The data cover a wavelength range of 1.6-5.2 $\mu$m at resolution $R=1000-2000$. We obtained 150-200 spectra of each system over two orbits, split between the G235M and G395M gratings. All three systems show strong, double-peaked He I emission lines dominated by an accretion disk. These lines are nearly stationary but contain radial velocity (RV) variable sub-components that trace stream-disk interactions. In Gaia14aae and SRGeJ0453, we detect two Na I doublets in emission whose RVs track the irradiated face of the donor, marking the first direct detection of the donors of long-period AM CVns. No absorption lines from the donors are detected, implying that the IR excesses observed in many long-period AM CVns primarily trace disks, not donors. The He I emission profiles in all systems lack high-velocity wings and show no emission beyond $\approx 1500,\rm km,s^{-1}$. The morphology of the disk eclipses and Doppler tomograms are best reproduced by models in which the disk is truncated well outside the white dwarf and only material at $r \gtrsim 0.07,R_{\odot}$ contributes to the disk emission. We interpret this as possible evidence of magnetized white dwarf accretors. For plausible mass transfer rates, the truncation radii imply surface magnetic fields of $B = 30-100$ kG, consistent with recent constraints based on X-ray periodicity. The absence of cyclotron humps out to 5 $\mu$m rules out stronger MG-level fields. We make the data from the program publicly available to the community.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['', '', 'tmp_2601.00975/./screen_lc_dev_23_10_25.JPG', 'tmp_2601.00975/./screen_fulu_23_10_25.JPG']
file not found 
file not found 
copying  tmp_2601.00975/./screen_lc_dev_23_10_25.JPG to _build/html/
copying  tmp_2601.00975/./screen_fulu_23_10_25.JPG to _build/html/
exported in  _build/html/2601.00975.md
    + _build/html/
    + _build/html/
    + _build/html/tmp_2601.00975/./screen_lc_dev_23_10_25.JPG
    + _build/html/tmp_2601.00975/./screen_fulu_23_10_25.JPG
found figures ['tmp_2601.02005/./Images/nz_result_updated.png', 'tmp_2601.02005/./Images/deep_to_wide_paper.png', 'tmp_2601.02005/./Images/combined_phz_plot_scatter.png']
copying  tmp_2601.02005/./Images/nz_result_updated.png to _build/html/
copying  tmp_2601.02005/./Images/deep_to_wide_paper.png to _build/html/
copying  tmp_2601.02005/./Images/combined_phz_plot_scatter.png to _build/html/
exported in  _build/html/2601.02005.md
    + _build/html/tmp_2601.02005/./Images/nz_result_updated.png
    + _build/html/tmp_

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\thesection}{\Alph{section}.\arabic{section}}$</div>



<div id="title">

# **Website with interactive visualization of multivariate astronomical time series**

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.00975-b31b1b.svg)](https://arxiv.org/abs/2601.00975)<mark>Appeared on: 2026-01-06</mark> -  _7 pages, 3 figures, accepted to Communications of BAO, Vol.72, Issue 2, 2025, pp.334-340_

</div>
<div id="authors">

M. Volkov$\star$, et al.

</div>
<div id="abstract">

**Abstract:** Light curves represent astronomical time series of flux measured across one or more photometric bands. With the rapid growth of large-scale sky surveys, time-domain astronomy has become an essential area of modern astrophysical research. Interactive visualization of extensive light-curve datasets plays a key role in exploring transient phenomena and in planning large follow-up campaigns. In this work, we introduce two web-based platforms designed for interactive light-curve visualization: ${\sc Fulu}$ , for transient event studies, and VALC, for investigations of low-mass active galactic nuclei (AGNs). These tools provide a user-friendly interface for examining, comparing, and interpreting vast collections of astronomical light curves, supporting scientific discovery.

</div>

<div id="div_fig1">

<img src="" alt="Fig1.1" width="50%"/><img src="" alt="Fig1.2" width="50%"/>

**Figure 1. -** (a) Sequence diagram illustrating the data flow between the user, frontend, API, and MongoDB when displaying and plotting object data using Plotly. (b) Component diagram illustrating simple architecture of the system. (*fig: seq*)

</div>
<div id="div_fig2">

<img src="tmp_2601.00975/./screen_lc_dev_23_10_25.JPG" alt="Fig2" width="100%"/>

**Figure 2. -** Interface example of the low-mass AGN light curves visualization at the VALC page. (*fig: screen_valc*)

</div>
<div id="div_fig3">

<img src="tmp_2601.00975/./screen_fulu_23_10_25.JPG" alt="Fig3" width="100%"/>

**Figure 3. -** Interface example of the transient event light curve, approximated by normalizing flows at the {\sc Fulu} page. (*fig: screen_fulu*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.00975"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\orcid}[1]$
$\newcommand{\arraystretch}{1.3}$</div>



<div id="title">

# $\Euclid$: Improving redshift distribution reconstruction using a deep-to-wide transfer function$\thanks{This paper is published on behalf of the Euclid Consortium.}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.02005-b31b1b.svg)](https://arxiv.org/abs/2601.02005)<mark>Appeared on: 2026-01-06</mark> - 

</div>
<div id="authors">

Y. Kang, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** The $\Euclid$ mission of the European Space Agency seeks to understand the Universe’s expansion history and the nature of dark energy, through measurements of cosmic shear. This requires a very accurate estimate of the true redshift distribution of the galaxies, with the systematic error in the mean redshift satisfying $\sigma_{\langle z\rangle}<0.002(1+z)$ per tomographic bin. Achieving this accuracy relies on reference samples with spectroscopic redshifts, together with a procedure to match them to survey sources for which only photometric redshifts are available. One important source of systematic uncertainty is the mismatch in photometric properties between galaxies in the $\Euclid$ survey and the reference objects. We develop a method to degrade the photometry of objects with deep photometry to match the properties of any shallower survey in the multi-band photometric space, preserving all the correlations between the fluxes and their uncertainties. We compare our transfer method with more demanding image-based methods, such as Balrog from the Dark Energy Survey Collaboration. According to our metrics, our method outperforms Balrog. We implement our method in the redshift distribution reconstruction, based on the self-organising map approach of Masters_2015 , and test it using a realistic sample from the $\Euclid$ Flagship Mock Galaxy Simulation. We find that the key ingredient is to ensure that the reference objects are distributed in the colour space the same way as the wide-survey objects, which can be efficiently achieved with our transfer method. In our best implementation, the mean redshift biases are consistently reduced across the tomographic bins, bringing a significant fraction of them within the Euclid accuracy requirements in all tomographic bins. Equally importantly, the tests allow us to pinpoint which step in the calibration pipeline has the strongest impact on achieving the required accuracy. Our approach also reproduces the overall redshift distributions, which are crucial for applications such as angular clustering. The agreement between the reconstructed and true distributions demonstrates both the feasibility and robustness of the approach. This implementation is sufficient for Euclid Data Release 1 and provides a solid foundation for subsequent data releases.

</div>

<div id="div_fig1">

<img src="tmp_2601.02005/./Images/nz_result_updated.png" alt="Fig7" width="100%"/>

**Figure 7. -** Mean redshift bias for different configurations of the calibration pipeline, shown as a function of redshift for ten equal-$z$ tomographic bins. The bias is computed relative to the true mean redshifts of the wide-sample $n(z)$ distributions. Violin points represent the distribution of 70 realisations, where violin points with face colour indicate calibration that used MPT-Mock-sample objects for projection and $n(z)$ reconstruction and points with no face colour indicate the projection and reconstruction are done by using deep-sample objects. The grey shaded region indicates the \Euclid requirements for $n(z)$ accuracy in weak lensing cosmology. Blue data points represent Scenario A (original  (Masters_2015)  method); Orange data points show Scenario B (photo-$z$s are used to define the tomographic binning); Green ones Scenario C ($z_{\mathrm{obs}}$ projection replaced with MPT-Mock-sample objects); Red data points show Scenario D (Tomographic bin defined by MPT-Mock-sample objects); Purple data points are Scenario E (SOM constructed by MPT-Mock-sample objects); Brown data points show Scenario F where full calibration is based on MPT-Mock-sample. The pink data points correspond to Scenario G, use the per-object photo-$z$ binning introduced in Roster_2025. The data points have been slightly shifted along the $x$-axis for clarity. (*fig:nz*)

</div>
<div id="div_fig2">

<img src="tmp_2601.02005/./Images/deep_to_wide_paper.png" alt="Fig1" width="100%"/>

**Figure 1. -** Projection of sources onto the self-organising map (SOM) trained using 8-band photometric data. Each SOM cell consists of objects with similar spectral energy distributions (SEDs). The background grayscale indicates the number of objects mapped to each cell. The red dot marks the true flux of a selected object projected onto the SOM, while the blue and green markers show 50 independent realisations of the same object with Deep and Wide photometric noise, respectively.
 (*fig:deep_to_wide*)

</div>
<div id="div_fig3">

<img src="tmp_2601.02005/./Images/combined_phz_plot_scatter.png" alt="Fig2" width="100%"/>

**Figure 2. -** {Left}: Comparison between RF photo-$z$ estimates and spectroscopic redshifts for objects in the MPT-Mock sample. A magnitude cut of $\IE\!<\!25$ and an S/N$ \geq\!10$ cut on the \IE-band photometry are applied. {Right}: Comparison of RF photo-$z$ estimates between matched objects in the Wide and MPT-Mock catalogues. The NMAD of the residuals and the outlier fractions are indicated in the figures, sources with $|z_{\mathrm{ph}}-z_{\mathrm{obs}}|>0.15(1+z_{\mathrm{obs}})$ being defined as outliers.  (*fig:photozspecz*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.02005"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\Mjup}{M_{\mathrm{Jup}}}$
$\newcommand{\Rhill}{R_{\mathrm{Hill}}}$
$\newcommand{\rev}[1]{\textbf{{#1}}}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# _JWST_/MIRI coronagraphic search for planets in systems with gapped exoKuiper belts and proper motion anomalies

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.02156-b31b1b.svg)](https://arxiv.org/abs/2601.02156)<mark>Appeared on: 2026-01-06</mark> -  _23 pages, 10 figures, Accepted for publication in MNRAS_

</div>
<div id="authors">

R. Bendahan-West, et al. -- incl., <mark>T. Henning</mark>

</div>
<div id="abstract">

**Abstract:** Over the past decade, ALMA has uncovered a range of substructures within exoKuiper belts, pointing to a population of undetected planets. With _JWST_ 's sensitivity, we now have the opportunity to identify these planets thought to be responsible for the observed substructures in debris discs. We present Cycle 1 _JWST_ /MIRI $11.4 \mu$ m coronagraphic observations of three exoKuiper belts that exhibit gaps in their radial structures: HD 92945, HD 107146, and HD 206893, to determine whether planets are responsible for carving these structures, as seen in our Solar System with the gas giants. We reduce the _JWST_ /MIRI data using \texttt{spaceKLIP} , and introduce new routines to mitigate the Brighter-Fatter effect and persistence. We do not detect any planet candidates, and all detected objects in the field-of-view are consistent with background stars or galaxies. However, by combining _JWST_ mass limits, archival observational constraints, and astrometric accelerations, we rule out a significant portion of planet parameter space, placing tight constraints on the planets possibly responsible for these gaps. To interpret these results, we explore multiple gap-carving scenarios in discs, either massless or with non-zero mass, including clearing by in-situ planet(s), as well as shaping by inner planets through mean-motion or secular apsidal resonances. Finally, we conclude that the planets causing the proper motion anomaly in these systems must reside within the inner 20 au.

</div>

<div id="div_fig1">

<img src="tmp_2601.02156/./figures/DPM_HD92945_edited.png" alt="Fig2.1" width="33%"/><img src="tmp_2601.02156/./figures/DPM_HD107146_MIRI_c_F1140C_min_contrast_edited.jpg" alt="Fig2.2" width="33%"/><img src="tmp_2601.02156/./figures/DPM_HD206893_MIRI_c_F1140C_min_contrast_edited.jpg" alt="Fig2.3" width="33%"/>

**Figure 2. -** DPMs for HD 92945, HD 107146, and HD 206893 (top to bottom). The blue shading shows the probability of a 5$\sigma$ planet detection with MIRI at 11.4 \micron, with contours marking the 50, 95 and 99.7\% confidence levels. For HD 92945, the red dash-dotted line shows the 99.7\% detection limit from NIRCam at 4.4 \micron. The darker grey shaded regions denote constraints from archival direct imaging and RV data, and grey dotted regions from Gaia astrometry (RUWE). The orange hatching marks limits imposed by the disc morphology, where planets within 3 $\Rhill$ of the disc edges would disrupt it (\S\ref{subsec: disc stability}). The purple curve describes the planet mass and separation combinations required to explain the observed PMa signal. The green solid line highlights the orbital parameters for a planet shaping the disc inner edge (\S\ref{constraints inner edge}), and the black dot with a question mark marks the possible parameters for a planet located at the centre of the gaps in a massless disc (\S\ref{sec: single-planet-massless-disc}). For HD 206893, the light-grey region rules out planet parameters to ensure planet stability based on mutual $\Rhill$. All DPM components are detailed in \S\ref{sec: DPM}. (*fig:detectability*)

</div>
<div id="div_fig2">

<img src="tmp_2601.02156/./figures/HD92945_madys_new_red.jpg" alt="Fig1.1" width="33%"/><img src="tmp_2601.02156/./figures/HD107146_madys_new_red.jpg" alt="Fig1.2" width="33%"/><img src="tmp_2601.02156/./figures/HD206893_madys_new_red.jpg" alt="Fig1.3" width="33%"/>

**Figure 1. -** Calibrated $5\sigma$ contrast curves (left) for the MIRI 1140C observations with the respective mass sensitivities in units of $\Mjup$ (right), as a function of projected separation. Black curves correspond to MIRI F1140C contrasts, blue curves correspond to \texttt{PanCAKE} contrast predictions, and for HD 92945 only, the red curve corresponds to the NIRCam F444W contrast \citep[from GO 3989,][]{Lazzoni_2025}. The different line styles represent contrast obtained using different PSF subtraction techniques (i.e., ADI, RDI, and ADI+RDI). All contrasts were calculated with \texttt{spaceKLIP} using the maximum number of KL modes and with 1 annulus/1 subsection. (*fig:contrast curves*)

</div>
<div id="div_fig3">

<img src="tmp_2601.02156/./figures/jwst+alma_new_red_final.png" alt="Fig8" width="100%"/>

**Figure 8. -** Comparison between the $11.4 \mu$m _JWST_/MIRI data (top row) and archival ALMA observations (bottom row) for HD 92945, HD 107146, and HD 206893 obtained from \citet[][respectively]{marino_2019, marino_2018, marino_2020}. The radial extent of the ALMA disc is over-plotted on the _JWST_ data to guide the eye. The positions of the sources in the _JWST_ observations are traced back to account for the proper motion of these stars to determine whether they are co-moving or background objects. Solid yellow circles indicate sources observed in both _JWST_ and ALMA (C1, C2, and C3 in HD 92945, and C3 in HD 206893), while dashed yellow circles denote sources without ALMA counterparts (C4 in HD 92945, C1 in HD 107146, and C1 and C2 in HD 206893). Only C4 in HD 92945 is consistent with being a star, while the others are consistent with background galaxies.The scale bar in the bottom right corner represents a projected distance of 50 au. The white circle in the bottom left corner of the ALMA plots denotes the beam size used in the observations. (*fig:bkg_vetting*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.02156"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

116  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
