# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

A. Frank  ->  A. Frank  |  ['A. Frank']
E. Bañados  ->  E. Bañados  |  ['E. Bañados']
Y. Khusanova  ->  Y. Khusanova  |  ['Y. Khusanova']
J. Liu  ->  J. Liu  |  ['J. Liu']
C. Gieser  ->  C. Gieser  |  ['C. Gieser']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']


Arxiv has 58 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2502.02614
extracting tarball to tmp_2502.02614... done.
Retrieving document from  https://arxiv.org/e-print/2502.02637


extracting tarball to tmp_2502.02637...

 done.






Found 137 bibliographic references in tmp_2502.02637/aanda.bbl.
Retrieving document from  https://arxiv.org/e-print/2502.02857


extracting tarball to tmp_2502.02857... done.
Retrieving document from  https://arxiv.org/e-print/2502.02873


extracting tarball to tmp_2502.02873...

 done.
Retrieving document from  https://arxiv.org/e-print/2502.03353
extracting tarball to tmp_2502.03353...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.02637-b31b1b.svg)](https://arxiv.org/abs/2502.02637) | **Ultra High-Redshift or Closer-by, Dust-Obscured Galaxies? Deciphering the Nature of Faint, Previously Missed F200W-Dropouts in CEERS**  |
|| G. Gandolfi, et al. -- incl., <mark>E. Bañados</mark>, <mark>Y. Khusanova</mark> |
|*Appeared on*| *2025-02-06*|
|*Comments*| *Submitted to Astronomy & Astrophysics*|
|**Abstract**|            The James Webb Space Telescope (JWST) is revolutionizing our understanding of the Universe by unveiling faint, near-infrared dropouts previously beyond our reach, ranging from exceptionally dusty sources to galaxies up to redshift $z \sim 14$. In this paper, we identify F200W-dropout objects in the Cosmic Evolution Early Release Science (CEERS) survey which are absent from existing catalogs. Our selection method can effectively identify obscured low-mass ($\log \text{M}_* \leq 9$) objects at $z \leq 6$, massive dust-rich sources up to $z \sim 12$, and ultra-high-redshift ($z > 15$) candidates. Primarily relying on NIRCam photometry from the latest CEERS data release and supplementing with Mid-Infrared/(sub-)mm data when available, our analysis pipeline combines multiple SED-fitting codes, star formation histories, and CosMix - a novel tool for astronomical stacking. Our work highlights three $2<z<3$ dusty dwarf galaxies which have larger masses compared to the typical dusty dwarfs previously identified in CEERS. Additionally, we reveal five faint sources with significant probability of lying above $z>15$, with best-fit masses compatible with $\Lambda$CDM and a standard baryons-to-star conversion efficiency. Their bi-modal redshift probability distributions suggest they could also be $z<1.5$ dwarf galaxies with extreme dust extinction. We also identify a strong line emitter galaxy at $z \sim 5$ mimicking the near-infrared emission of a $z \sim 13$ galaxy. Our sample holds promising candidates for future follow-ups. Confirming ultra high-redshift galaxies or lower-z dusty dwarfs will offer valuable insights into early galaxy formation, evolution with their central black holes and the nature of dark matter, and/or cosmic dust production mechanisms in low-mass galaxies, and will help us to understand degeneracies and contamination in high-z object searches.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.02614-b31b1b.svg)](https://arxiv.org/abs/2502.02614) | **Earth Detecting Earth: At what distance could Earth's constellation of technosignatures be detected with present-day technology?**  |
|| S. Z. Sheikh, et al. -- incl., <mark>A. Frank</mark> |
|*Appeared on*| *2025-02-06*|
|*Comments*| *18 pages, 1 figure, 2 tables, published in AJ*|
|**Abstract**|            The field of the Search for Extraterrestrial Intelligence (SETI) searches for ``technosignatures'' that could provide the first detection of life beyond Earth through the technology that an extraterrestrial intelligence (ETI) may have created. Any given SETI survey, if no technosignatures are detected, should set upper limits based on the kinds of technosignatures it should have been able to detect; the sensitivity of many SETI searches requires that their target sources (e.g., Dyson spheres or Kardashev II/III level radio transmitters) emit with power far exceeding the kinds of technology humans have developed. In this paper, we instead turn our gaze Earthward, minimizing the axis of extrapolation by only considering transmission and detection methods commensurate with an Earth-2024 level. We evaluate the maximum distance of detectability for various present-day Earth technosignatures -- radio transmissions, atmospheric technosignatures, optical and infrared signatures, and objects in space or on planetary surfaces -- using only present-day Earth instruments, providing one of the first fully cross-wavelength comparisons of the growing toolbox of SETI techniques. In this framework, we find that Earth's space-detectable signatures span 13 orders of magnitude in detectability, with intermittent, celestially-targeted radio transmission (i.e., planetary radar) beating out its nearest non-radio competitor by a factor of $10^3$ in detection distance. This work highlights the growing range of ways that exoplanet technosignatures may be expressed, the growing complexity and visibility of the human impact upon our planet, and the continued importance of the radio frequencies in SETI.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.02857-b31b1b.svg)](https://arxiv.org/abs/2502.02857) | **Propagation-induced Frequency-dependent Polarization Properties of Fast Radio Burst**  |
|| W.-Y. Wang, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2025-02-06*|
|*Comments*| *20 pagers, 11 figures, AAAS journal submitted*|
|**Abstract**|            Frequency-dependent polarization properties provide crucial insights into the radiation mechanisms and magnetic environments of fast radio bursts (FRBs). We explore an analytical solution of radiative transfer of the polarization properties of FRBs as a strong incoming wave propagates in a homogeneous magnetized plasma. The case of a thermal plasma is studied in more detail. The rotational axis of the polarization spectrum undergoes precession with frequency on the Poincaré sphere when the medium has both strong Faraday rotation and conversion. Such precession on the Poincaré sphere could occur in hot or cold plasma with a strong magnetic field component perpendicular to the line of sight. The analytical solution with the mixing Faraday case offers a more physical description of the physical properties of the magnetic environment of FRBs than the empirical ``generalized Faraday rotation'' method commonly adopted in the literature. Significant absorption can exist in a dense plasma medium, which may give rise to a highly circularly polarized outgoing wave. The frequency-dependent Stokes parameters may be associated with reversing rotation measures or the presence of a persistent radio source around an FRB.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.02873-b31b1b.svg)](https://arxiv.org/abs/2502.02873) | **Resolved Gas Temperatures and 12C/13C ratios in SVS13A from ALMA Observations of CH3CN and CH3-13-CN**  |
|| T.-H. Hsieh, et al. -- incl., <mark>C. Gieser</mark> |
|*Appeared on*| *2025-02-06*|
|*Comments*| *14 pages, 13 figures*|
|**Abstract**|            Context. Multiple systems are common in field stars, and the frequency is found to be higher in early evolutionary stages. Thus, the study of young multiple systems during the embedded stages is key to have a comprehensive understanding of star formation. In particular, how material accretes from the large-scale envelope into the inner region and how this flow interacts with the system physically and chemically has not yet been well characterized observationally. Aims. We aim to provide a snapshot of the forming protobinary system SVS13A, consisting of VLA4A and VLA4B. This includes clear pictures of its kinematic structures, physical conditions, and chemical properties. Methods. We conducted ALMA observations toward SVS13A targeting CH3CN and CH3-13CN J=12-11 K-ladder line emission with a high spatial resolution of ~30 au at a spectral resolution of ~0.08 km s-1 Results. We perform LTE radiative transfer models to fit the spectral features of the line emission. We find the two-layer LTE radiative model including dust absorption is essential to interpret the CH3CN and CH3-13-CN line emission. We identify two major and four small kinematic components, and derive their physical and chemical properties. Conclusions. We find a possible infalling signature toward the bursting secondary source VLA4A, which may be fed by an infalling streamer from the large-scale envelope. The mechanical heating in the binary system, as well as the infalling shocked gas, likely play a role in the thermal structure of the protobinary system. By accumulating mass from the streamer, the system might have experienced a gravitationally unstable phase before the accretion outburst. Finally, the derived CH3CN/CH3-13-CN ratio is lower than the canonical ratio in the ISM and is different between VLA4A and VLA4B.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.03353-b31b1b.svg)](https://arxiv.org/abs/2502.03353) | **Constraints on Ultra-light Axion Dark Matter through Galaxy Cluster Number Counts**  |
|| S. Zelmer, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-02-06*|
|*Comments*| *16 pages, 11 figures, submitted to A&A*|
|**Abstract**|            Ultra-light axions are hypothetical scalar particles that influence the evolution of large-scale structures of the Universe. Depending on their mass, they can potentially be part of the dark matter component of the Universe, as candidates commonly referred to as fuzzy dark matter. While strong constraints have been established for pure fuzzy dark matter models, the more general scenario where ultra-light axions constitute only a fraction of the dark matter has been limited to a few observational probes. In this work, we use the galaxy cluster number counts obtained from the first All-Sky Survey (eRASS1) of the SRG/eROSITA mission together with gravitational weak lensing data from the Dark Energy Survey, the Kilo-Degree Survey, and the Hyper Suprime-Cam, to constrain the fraction of ultra-light axions in the mass range $10^{-32}$ eV to $10^{-24}$ eV. We put upper bounds on the ultra-light axion relic density in independent logarithmic axion mass bins by performing a full cosmological parameter inference. We find an exclusion region in the intermediate ultra-light axion mass regime with the tightest bounds reported so far in the mass bins around $m_\mathrm{a}=10^{-27}$ eV with $\Omega_\mathrm{a} < 0.0036$ and $m_\mathrm{a}=10^{-26}$ eV with $\Omega_\mathrm{a} < 0.0084$, both at 95% confidence level. When combining with CMB probes, these bounds are tightened to $\Omega_\mathrm{a} < 0.0030$ in the $m_\mathrm{a}=10^{27}$ eV mass bin and $\Omega_\mathrm{a} < 0.0058$ in the $m_\mathrm{a}=10^{-26}$ eV mass bin, both at 95% confidence level. This is the first time that constraints on ultra-light axions have been obtained using the growth of structure measured by galaxy cluster number counts. These results pave the way for large surveys, which can be utilized to obtain tight constraints on the mass and relic density of ultra-light axions with better theoretical modeling of the abundance of halos.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2502.02637/./Figures/colormag3.png', '', '', '', '', '', '', '']
copying  tmp_2502.02637/./Figures/colormag3.png to _build/html/
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
exported in  _build/html/2502.02637.md
    + _build/html/tmp_2502.02637/./Figures/colormag3.png
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Ultra High-Redshift or Closer-by, Dust-Obscured Galaxies? Deciphering the Nature of Faint, Previously Missed F200W-Dropouts in CEERS

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2502.02637-b31b1b.svg)](https://arxiv.org/abs/2502.02637)<mark>Appeared on: 2025-02-06</mark> -  _Submitted to Astronomy & Astrophysics_

</div>
<div id="authors">

G. Gandolfi, et al. -- incl., <mark>E. Bañados</mark>, <mark>Y. Khusanova</mark>

</div>
<div id="abstract">

**Abstract:** The James Webb Space Telescope (JWST) is revolutionizing our understanding of the Universe by unveiling faint, near-infrared dropouts previously beyond our reach, ranging from exceptionally dusty sources to galaxies up to redshift $z \sim 14$ . In this paper, we identify F200W-dropout objects in the Cosmic Evolution Early Release Science (CEERS) survey which are absent from existing catalogs. Our selection method can effectively identify obscured low-mass ( $\log \text{M}_* \leq 9$ ) objects at $z \leq 6$ , massive dust-rich sources up to $z \sim 12$ , and ultra-high-redshift ( $z > 15$ ) candidates. Our goal is to uncover promising targets for further studies using deep mid-infrared imaging and/or spectroscopic follow-ups. We utilize two photometric catalogs optimized for detecting faint, red objects. Primarily relying on NIRCam photometry from the latest CEERS data release and supplementing with Mid-Infrared/(sub-)mm data when available, our analysis pipeline combines multiple SED-fitting codes, star formation histories, and the novel \texttt{CosMix} tool for astronomical stacking to maximize available photometric information. Our work highlights three $2<z<3$ dusty dwarf galaxies which have larger masses compared to the typical dusty dwarfs previously identified in CEERS. Additionally, we reveal five faint sources with significant probability of lying above $z>15$ , with best-fit masses compatible with $\Lambda$ CDM and a standard baryons-to-star conversion efficiency. Their bi-modal redshift probability distributions suggest they could also be $z<1.5$ dwarf galaxies with extreme dust extinction. We also identify a strong line emitter galaxy at $z \sim 5$ mimicking the near-infrared emission of a $z \sim 13$ galaxy. Our sample holds promising candidates for future follow-ups. Confirming ultra high-redshift galaxies or lower-z dusty dwarfs will offer valuable insights into early galaxy formation, evolution with their central black holes and the nature of dark matter, and/or cosmic dust production mechanisms in low-mass galaxies, and will help us to understand degeneracies and contamination in high-z object searches.

</div>

<div id="div_fig1">

<img src="tmp_2502.02637/./Figures/colormag3.png" alt="Fig11" width="100%"/>

**Figure 11. -** [F277W - F356W] versus [F200W - F277W] color-color diagram for our sources. The F200W-dropout representation scheme is the same adopted in Figure \ref{fig:colormagplot1} and Figure \ref{fig:colormagplot2}, as well as the x-axis average errors. The black rectangle highlights the UHR LBG selection for $15 < z < 20$ galaxies by Castellano et al., (2025; _in prep._), whereas the blue shaded area corresponds to the $15 < z < 20$ LBG selection adopted in [Kokorev, Atek and Chisholm (2024)](). (*fig:colormagplot3*)

</div>
<div id="div_fig2">

<img src="" alt="Fig5.1" width="25%"/><img src="" alt="Fig5.2" width="25%"/><img src="" alt="Fig5.3" width="25%"/><img src="" alt="Fig5.4" width="25%"/>

**Figure 5. -** Vibrational stability equation of state
               $S_{\mathrm{vib}}(\lg e, \lg \rho)$.
               $>0$ means vibrational stability.
              Vibrational stability equation of state
               $S_{\mathrm{vib}}(\lg e, \lg \rho)$.
               $>0$ means vibrational stability.
              Nonlinear Model ResultsNonlinear Model ResultsSpectral types and photometry for stars in the
  region.Spectral types and photometry for stars in the
  region.List of nearby SNe used in this work.Summary for ISOCAM sources with mid-IR excess
(YSO candidates).Summary for ISOCAM sources with mid-IR excess
(YSO candidates). Sample stars with absolute magnitudecontinued. Sample stars with absolute magnitudecontinued.Shown in greyscale is a...Plotted above...Complexes characterisation.Line data and abundances ...Continued. (*FigVibStab*)

</div>
<div id="div_fig3">

<img src="" alt="Fig23.1" width="33%"/><img src="" alt="Fig23.2" width="33%"/><img src="" alt="Fig23.3" width="33%"/>

**Figure 23. -** Vibrational stability equation of state
               $S_{\mathrm{vib}}(\lg e, \lg \rho)$.
               $>0$ means vibrational stability.
              Nonlinear Model ResultsNonlinear Model ResultsSpectral types and photometry for stars in the
  region.Spectral types and photometry for stars in the
  region.List of nearby SNe used in this work.Summary for ISOCAM sources with mid-IR excess
(YSO candidates).Summary for ISOCAM sources with mid-IR excess
(YSO candidates). Sample stars with absolute magnitudecontinued. Sample stars with absolute magnitudecontinued.Shown in greyscale is a...Plotted above...Complexes characterisation.Line data and abundances ...Continued. (*FigVibStab*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2502.02637"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

327  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
