# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

Y. Cheng  ->  Y. Cheng  |  ['Y. Cheng']
E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
L. Xie  ->  Z.-L. Xie  |  ['L. Xie']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Wolf  ->  D. J. Wolf  |  ['J. Wolf']


Arxiv has 36 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2508.15889
extracting tarball to tmp_2508.15889... done.


Issues with the citations
list index out of range
Retrieving document from  https://arxiv.org/e-print/2508.15901
extracting tarball to tmp_2508.15901...

 done.


Found 71 bibliographic references in tmp_2508.15901/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2508.15915
extracting tarball to tmp_2508.15915...

 done.
Retrieving document from  https://arxiv.org/e-print/2508.16234


extracting tarball to tmp_2508.16234... done.
Retrieving document from  https://arxiv.org/e-print/2508.16461


extracting tarball to tmp_2508.16461... done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2508.15889-b31b1b.svg)](https://arxiv.org/abs/2508.15889) | **Massive extended streamers feed high-mass young stars**  |
|| F. A. Olguin, et al. -- incl., <mark>Y. Cheng</mark> |
|*Appeared on*| *2025-08-25*|
|*Comments*| *Published in Science Advances and available at this https URL*|
|**Abstract**|            Stars are born in a variety of environments that determine how they gather gas to achieve their final masses. It is generally believed that disks are ubiquitous around protostars as a result of angular momentum conservation and are natural places to grow planets. As such, they are proposed to be the last link in the inflow chain from the molecular cloud to the star. However, disks are not the only form that inflows can take. Here we report on high-resolution observations performed with the Atacama Large Millimeter/submillimeter Array that reveal inflows in the form of streamers. These streamers persist well within the expected disk radius, indicating that they play a substitute role channeling material from the envelope directly to an unresolved small disk or even directly to the forming high-mass protostar. These flows are massive enough to feed the central unresolved region at a rate sufficient to quench the feedback effects of the young massive star.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2508.15901-b31b1b.svg)](https://arxiv.org/abs/2508.15901) | **Surveying the Whirlpool at Arcseconds with NOEMA (SWAN): III. $^{13}$CO/C$^{18}$O ratio variations across the M51 galaxy**  |
|| I. Galić, et al. -- incl., <mark>E. Schinnerer</mark> |
|*Appeared on*| *2025-08-25*|
|*Comments*| *Accepted for publication in A&A; 8 pages, 4 pages Appendix, 10 figures*|
|**Abstract**|            CO isotopologues are common tracers of the bulk molecular gas in extragalactic studies, providing insights into the physical and chemical conditions of the cold molecular gas, a reservoir for star formation. Since star formation occurs within molecular clouds, mapping CO isotopologues at cloud-scale is important to understanding the processes driving star formation. However, achieving this mapping at such scales is challenging and time-intensive. The Surveying the Whirlpool Galaxy at Arcseconds with NOEMA (SWAN) survey addresses this by using the Institut de radioastronomie millimétrique (IRAM) NOrthern Extended Millimeter Array (NOEMA) to map the $^{13}$CO(1-0) and C$^{18}$O(1-0) isotopologues, alongside several dense gas tracers, in the nearby star-forming galaxy M51 at high sensitivity and spatial resolution ($\approx$ 125 pc).We examine the $^{13}$CO(1-0) to C$^{18}$O(1-0) line emission ratio as a function of galactocentric radius and star formation rate surface density to infer how different chemical and physical processes affect this ratio at cloud scales across different galactic environments: nuclear bar, molecular ring, northern and southern spiral arms. In line with previous studies conducted at kiloparsec scales for nearby star-forming galaxies, we find a moderate positive correlation with galactocentric radius and a moderate negative correlation with star formation rate surface density across the field-of-view (FoV), with slight variations depending on the galactic environment. We propose that selective nucleosynthesis and changes in the opacity of the gas are the primary drivers of the observed variations in the ratio.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2508.15915-b31b1b.svg)](https://arxiv.org/abs/2508.15915) | **Euclid preparation. Establishing the quality of the 2D reconstruction of the filaments of the cosmic web with DisPerSE using Euclid photometric redshifts**  |
|| E. Collaboration, et al. -- incl., <mark>L. Xie</mark>, <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-08-25*|
|*Comments*| *27 pages, 14 Figures*|
|**Abstract**|            Cosmic filaments are prominent structures of the matter distribution of the Universe. Modern detection algorithms are an efficient way to identify filaments in large-scale observational surveys of galaxies. Many of these methods were originally designed to work with simulations and/or well-sampled spectroscopic surveys. When spectroscopic redshifts are not available, the filaments of the cosmic web can be detected in projection using photometric redshifts in slices along the Line of Sight, which enable the exploration of larger cosmic volumes. However, this comes at the expense of a lower redshift precision. It is therefore crucial to assess the differences between filaments extracted from exact redshifts and from photometric redshifts for a specific survey. We apply this analysis to capture the uncertainties and biases of filament extractions introduced by using the photometric sample of the Euclid Wide Survey. The question that we address in this work is how can we compare two filament samples derived with redshifts of different precisions in the Euclid Wide Survey context. We apply the cosmic web detection algorithm DisPerSE, in the redshift range $0.1 \leq z \leq 0.5$, to the GAlaxy Evolution and Assembly (GAEA) simulated galaxy sample which reproduces several characteristics of the Euclid Wide Survey. We develop a method to compare skeletons derived from photometric redshifts to those derived from true galaxy positions. This method expands the commonly used measure of distance between filaments to include geometrical (angles between filaments) and astrophysical considerations (galaxy mass gradients and connectivity-mass relations). We assess whether this approach strengthens our ability to correctly identify filaments in very large surveys such as the Euclid Wide Survey. [abridged]         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2508.16234-b31b1b.svg)](https://arxiv.org/abs/2508.16234) | **Exploring HI Galaxy Redshift Survey Strategies for the FAST Core Array Interferometry**  |
|| Z. Li, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-08-25*|
|*Comments*| *19 pages, 8 figures*|
|**Abstract**|            We explore the feasibility of HI galaxy redshift surveys with the Five-hundred-meter Aperture Spherical Telescope (FAST) and its proposed Core Array interferometry. Using semi-analytical simulations, we compare the performance of the FAST single-dish and Core Array modes in drift scan (DS) and on-the-fly (OTF) observations across different redshifts. Our results show that the FAST single-dish mode enables significant HI detections at low redshifts ($z \lesssim 0.35$) but is limited at higher redshifts due to shot noise. The Core Array interferometry, with higher sensitivity and angular resolution, provides robust HI galaxy detections up to $z \sim 1$, maintaining a sufficient number density for power spectrum measurements and BAO constraints. At low redshifts ($z \sim 0.01$ -- $0.08$), both configurations perform well, though cosmic variance dominates uncertainties. At higher redshifts ($z > 0.35$), the Core Array outperforms the single-dish mode, while increasing the survey area has little impact on single-dish observations due to shot noise limitations. The DS mode efficiently covers large sky areas but is constrained by Earth's rotation, whereas the OTF mode allows more flexible deep-field surveys at the cost of operational overhead. Our findings highlight the importance of optimizing survey strategies to maximize FAST's potential for HI cosmology. The Core Array is particularly well-suited for high-redshift HI galaxy surveys, enabling precise constraints on large-scale structure and dark energy.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2508.16461-b31b1b.svg)](https://arxiv.org/abs/2508.16461) | **The Einstein Telescope Pathfinder and its Vacuum System**  |
|| T. Höhn, et al. -- incl., <mark>J. Wolf</mark> |
|*Appeared on*| *2025-08-25*|
|*Comments*| *submitted to PoS(ICRC2025)*|
|**Abstract**|            The Einstein Telescope (ET) will be the next generation gravitational wave observatory in Europe with a sensitivity reaching beyond the CMB into the dark era of the Universe. Each corner of the triangular baseline design is the center of two interferometers with 10 km long arms, one operated at room temperature, the other one with mirrors at cryogenic temperatures of 10-15 K that reduce the noise contribution at frequencies as low as 3 Hz. The ETpathfinder (ET-PF) project at Maastricht University is a R\&D facility for the challenging cryogenic interferometer technology of ET. It is a 20m x 20m interferometer with six towers that will house the seismically decoupled cryogenic Si-mirrors, laser systems, and detectors. The KIT group developed the control system of the ultra-high vacuum system for ET-PF, based on the expertise from the KATRIN neutrino mass experiment. In addition, a test facility is currently being set up at KIT to investigate adsorption and desorption processes of residual gas on the cryogenic mirror surfaces, as well as monitoring techniques and in-situ cleaning procedures. This paper presents the objectives and status of these activities and their contribution towards the next generation gravitational wave observatory.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2508.15889/./continuum_c8c9.png', 'tmp_2508.15889/./keplerian_pv_northern.png', 'tmp_2508.15889/./sketch_science_v2.png']
copying  tmp_2508.15889/./continuum_c8c9.png to _build/html/
copying  tmp_2508.15889/./keplerian_pv_northern.png to _build/html/
copying  tmp_2508.15889/./sketch_science_v2.png to _build/html/
exported in  _build/html/2508.15889.md
    + _build/html/tmp_2508.15889/./continuum_c8c9.png
    + _build/html/tmp_2508.15889/./keplerian_pv_northern.png
    + _build/html/tmp_2508.15889/./sketch_science_v2.png
found figures ['tmp_2508.15901/./new_plots_agn_edited/split_env_radius_new_pystruc0_2025_08_07.png', 'tmp_2508.15901/./new_plots_agn_edited/split_env_sfr_new_pystruc0_2025_08_07.png', 'tmp_2508.15901/./new_plots_agn_edited/paper1_maps_ratio_sfr_envs.png']
copying  tmp_2508.15901/./new_plots_agn_edited/split_env_radius_new_pystruc0_2025_08_07.png to _build/html/
copying  tmp_2508.15901/./new_plots_agn_edited/split_env_sfr_new_pystruc0_2025_08_07.png t

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\pcc}{ cm^{-3}}$
$\newcommand{\msun}{{\rm M}_\odot}$
$\newcommand{\lsun}{{\rm L}_\odot}$
$\newcommand{\kms}{km s^{-1}}$
$\newcommand{◦}{^{\circ}}$
$\newcommand{\mas}{{\rm mas}}$
$\newcommand{\apj}{ApJ}$
$\newcommand{\apjl}{ApJL}$
$\newcommand{\aap}{A\&A}$
$\newcommand{\mnras}{MNRAS}$
$\newcommand{\araa}{ARA\&A}$
$\newcommand{\ssr}{SSRv}$
$\newcommand{\pasp}{PASP}$
$\newcommand{\aj}{AJ}$
$\newcommand$
$\newcommand{\fnum@figure}{\textbf{Figure \thefigure}}$
$\newcommand{\fnum@table}{\textbf{Table \thetable}}$
$\newcommand{\thefigure}{S\arabic{figure}}$
$\newcommand{\thetable}{S\arabic{table}}$
$\newcommand{\theequation}{S\arabic{equation}}$
$\newcommand{\thepage}{S\arabic{page}}$
$\newcommand\scititle{$
$	Massive extended streamers feed high-mass young stars$
$}$</div>



<div id="title">

# $\bfseries$ $\boldmath$ $\scititle$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2508.15889-b31b1b.svg)](https://arxiv.org/abs/2508.15889)<mark>Appeared on: 2025-08-25</mark> -  _Published in Science Advances and available at this https URL_

</div>
<div id="authors">

F. A. Olguin, et al. -- incl., <mark>Y. Cheng</mark>

</div>
<div id="abstract">

**Abstract:** $\bfseries$ $\boldmath$ Stars are born in a variety of environments that determine how they gather gas to achieve their final masses.It is generally believed that disks are ubiquitous around protostars as a result of angular momentum conservation and are natural places to grow planets.As such, they are proposed to be the last link in the inflow chain from the molecular cloud to the star.However, disks are not the only form that inflows can take.Here we report on high-resolution observations performed with the Atacama Large Millimeter/submillimeter Array that reveal inflows in the form of streamers.These streamers persist well within the expected disk radius, indicating that they play a substitute role channeling material from the envelope directly to an unresolved small disk or even directly to the forming high-mass protostar.These flows are massive enough to feed the central unresolved region at a rate sufficient to quench the feedback effects of the young massive star.

</div>

<div id="div_fig1">

<img src="tmp_2508.15889/./continuum_c8c9.png" alt="Fig1" width="100%"/>

**Figure 1. -** **ALMA 1.3 mm continuum emission and CH$\bf _3$OH $\bf J_{K_a,K_c** =18_{3,15}-17_{4,14} A, v_t=0$ first moment map.}**(A)** The green triangle corresponds to the central source, ALMA1.
The contour levels are $-3$, 5, 10, 20, 40, 80$\times\sigma_{rms}$ with $\sigma_{rms}=28$$\mu$Jy beam$^{-1}$.
The blue- and red-shifted streamer directions are shown by the curves for the corresponding color.
The purple region shows extended emission from ALMA1 likely tracing a jet component.
**(B)** The first moment velocity map with respect to the systemic velocity ($-47.2$ km s$^{-1}$\cite{Taniguchi2023}) is accompanied by colored curves describing the trajectory and velocity distribution of the streamlines of a rotating and infalling envelope.
The blue streamer is divided in two streamlines.
The outer component presents a refinement of the streamline model presented in \cite{Olguin2023}, while the inner component corresponds to a similar model but with a streamline origin at the centrifugal radius and close to the mid-plane (see Materials and Methods).
The contours are the same as in (A).
The pink ellipse correspond to a circle of radius 500 au, i.e., the centrifugal radius, projected in plane of the sky assuming an inclination angle of 65◦ with respect to the line of sight.
Synthesized beam sizes for the continuum and first moment maps are shown in the bottom left corner.
 (*fig:continuum*)

</div>
<div id="div_fig2">

<img src="tmp_2508.15889/./keplerian_pv_northern.png" alt="Fig2" width="100%"/>

**Figure 2. -** **Blue-shifted streamer position-velocity (PV) diagram in CH$\bf _3$OH.**
The green solid line corresponds to the line of sight Keplerian velocity at the same deprojected distance to the central source, ALMA1.
Similarly, the dotted green line corresponds to a Keplerian rotation and infall velocity distribution, while the dashed line corresponds to the IRE velocity distribution \cite{Oya2022}.
The inner and outer blue streamer models in Fig. \ref{fig:continuum}B are shown in blue dot-dashed lines.
The lower abscissa scale corresponds to the distance along the path used to calculate the PV diagram slice (shown in Fig. \ref{fig:continuum}) with the package  pvextractor, while the upper one corresponds to the radial distance to ALMA1 assuming that the slice points are located in the mid-plane (see Materials and Methods).
The mid-plane is assumed to be inclined 65◦ with respect to the line of sight.
The vertical light blue line marks the position of the centrifugal radius, $R_c=500$ au.
 (*fig:pvmap*)

</div>
<div id="div_fig3">

<img src="tmp_2508.15889/./sketch_science_v2.png" alt="Fig3" width="100%"/>

**Figure 3. -** **Schematic representation of the different kinematic components and flow scenarios.**(** A**) Schematic representation of the observations with the different components labeled by the tag with the respective color.
The green arrows point in the direction of the outflows, while the blue to red arrow indicated the rotation direction.
The scale at the bottom indicates the radius of ALMA1, the centrifugal barrier radius and the centrifugal radius from left to right, respectively.
(** B**) and (** C**) Are the two scenarios proposed to explain the origin of the shocks in the blue streamer.
Green arrows represent the background infall from the less dense envelope.
 (*fig:sketch*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2508.15889"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Surveying the Whirlpool at Arcseconds with NOEMA (SWAN): III. $^{13}$CO/C$^{18}$O ratio variations across the M51 galaxy

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2508.15901-b31b1b.svg)](https://arxiv.org/abs/2508.15901)<mark>Appeared on: 2025-08-25</mark> -  _Accepted for publication in A&A; 8 pages, 4 pages Appendix, 10 figures_

</div>
<div id="authors">

I. Galić, et al. -- incl., <mark>E. Schinnerer</mark>

</div>
<div id="abstract">

**Abstract:** CO isotopologues are common tracers of the bulk molecular gas in extragalactic studies, providing insights into the physical and chemical conditions of the cold molecular gas, a reservoir for star formation. Since star formation occurs within molecular clouds, mapping CO isotopologues at cloud-scale is important to understanding the processes driving star formation. However, achieving this mapping at such scales is challenging and time-intensive. The Surveying the Whirlpool Galaxy at Arcseconds with NOEMA (SWAN) survey addresses this by using the Institut de radioastronomie millimétrique (IRAM) NOrthern Extended Millimeter Array (NOEMA) to map the $^{13}$ CO(1-0) and C $^{18}$ O(1-0) isotopologues, alongside several dense gas tracers, in the nearby star-forming galaxy M51 at high sensitivity and spatial resolution ( $\approx$ 125 pc). We examine the $^{13}$ CO(1-0) to C $^{18}$ O(1-0) line emission ratio as a function of galactocentric radius and star formation rate surface density to infer how different chemical and physical processes affect this ratio at cloud scales across different galactic environments: nuclear bar, molecular ring, northern and southern spiral arms. In line with previous studies conducted at kiloparsec scales for nearby star-forming galaxies, we find a moderate positive correlation with galactocentric radius and a moderate negative correlation with star formation rate surface density across the field-of-view (FoV), with slight variations depending on the galactic environment. We propose that selective nucleosynthesis and changes in the opacity of the gas are the primary drivers of the observed variations in the ratio.

</div>

<div id="div_fig1">

<img src="tmp_2508.15901/./new_plots_agn_edited/split_env_radius_new_pystruc0_2025_08_07.png" alt="Fig5" width="100%"/>

**Figure 5. -** The figure displays $\mathrm{{R}^{13}_{18}}$ plotted against galactocentric radius. In the top panel, coloured points represent sightlines where both emission lines have S/N > 3. Downward triangles denote lower limits, where the ratio has S/N $\leq$ 3 and C$^{18}$O(1-0) also has S/N $\leq$ 3. Upward triangles indicate upper limits, where the ratio has S/N $\leq$ 3 and $^{13}$CO(1-0) also has S/N $\leq$ 3. Diamonds represent non-detections, where both lines have either S/N $\leq$ 3 or S/N > 3, but result in the ratio having S/N $\leq$ 3. White hexagons correspond to points obtained via spectral stacking with the error bars corresponding to the propagated statistical uncertainties. The bottom panels highlight the points of each environment (see right map in  Fig. \ref{fig:maps}), with grey points matching the coloured ones in the top panel and the coloured points highlighting the respective environment. The black dashed line marks the $\widetilde{{\mathrm{R}}}^{13}_{18}$ for the FoV, while the coloured dashed line indicates the $\widetilde{{\mathrm{R}}}^{13}_{18}$ for the specific environment. The colour saturation for points in both the top and bottom panels reflects the kernel density estimate (KDE). (*fig:env_plots_radius*)

</div>
<div id="div_fig2">

<img src="tmp_2508.15901/./new_plots_agn_edited/split_env_sfr_new_pystruc0_2025_08_07.png" alt="Fig6" width="100%"/>

**Figure 6. -** The figure displays $\mathrm{{R}^{13}_{18}}$ plotted against SFR surface density. The description is analogous to Fig. \ref{fig:env_plots_radius}, with an added black dashed line indicating the FoV's median $\Sigma_{\text{SFR}}$. The black line extending in the northern spiral arm panel arises from a low-value non-detection point. (*fig:env_plots_sfr*)

</div>
<div id="div_fig3">

<img src="tmp_2508.15901/./new_plots_agn_edited/paper1_maps_ratio_sfr_envs.png" alt="Fig4" width="100%"/>

**Figure 4. -** The complete dataset is presented in this figure, while the version masked for AGN activity and used in the analysis is provided in Appendix \ref{AGN Acivity}. The left panel shows the $\mathrm{R}^{13}_{18}$ line ratio map, while the central panel presents the $\Sigma_{\text{SFR}}$ map, limited to regions where the line ratio is significantly measured. In these maps, light gray points indicate non-detections (S/N $\leq$ 3 in both lines), intermediate gray denotes lower limits (S/N $\leq$ 3 in C$^{18}$O) and dark gray indicates upper limits (S/N $\leq$ 3 in $^{13}$CO). Coloured points mark detections with S/N > 3 in both lines. The overlaid contours represent the 30 K km s$^{-1}$ level of $^{12}$CO(1–0) emission for reference. The right panel shows the PAWS environmental mask  ([Colombo, Hughes and Schinnerer 2014]())  over the SWAN FoV, where different colours denote distinct environments: nuclear bar (blue), molecular ring (pink), northern spiral arm (orange), southern spiral arm (red), and interarm (green). Points with black outlines correspond to the significant detections shown in the other panels. (*fig:maps*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2508.15901"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

127  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
