# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

P. Molliere  ->  P. Molliere  |  ['P. Molliere']
W. Brandner  ->  W. Brandner  |  ['W. Brandner']
G. Chauvin  ->  G. Chauvin  |  ['G. Chauvin']
T. Henning  ->  T. Henning  |  ['T. Henning']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
F. Walter  ->  F. Walter  |  ['F. Walter']
S. Kraus  ->  S. Kraus  |  ['S. Kraus']
D. Mortimer  ->  D. Mortimer  |  ['D. Mortimer']


X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
Arxiv has 75 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2411.05917


extracting tarball to tmp_2411.05917...

 done.


P. Mollière  ->  P. Mollière  |  ['P. Mollière']
W. Brandner  ->  W. Brandner  |  ['W. Brandner']
G. Chauvin  ->  G. Chauvin  |  ['G. Chauvin']
T. Henning  ->  T. Henning  |  ['T. Henning']


list index out of range


Retrieving document from  https://arxiv.org/e-print/2411.06356


extracting tarball to tmp_2411.06356... done.
Retrieving document from  https://arxiv.org/e-print/2411.06372


extracting tarball to tmp_2411.06372...

 done.
Retrieving document from  https://arxiv.org/e-print/2411.06474


extracting tarball to tmp_2411.06474... done.


F. Walter  ->  F. Walter  |  ['F. Walter']


Found 87 bibliographic references in tmp_2411.06474/mrybak_as2vla_sizes.bbl.
Retrieving document from  https://arxiv.org/e-print/2411.06647


extracting tarball to tmp_2411.06647...

 done.
Retrieving document from  https://arxiv.org/e-print/2411.06996


extracting tarball to tmp_2411.06996... done.
Retrieving document from  https://arxiv.org/e-print/2411.07162



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2411.07162...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.06474-b31b1b.svg)](https://arxiv.org/abs/2411.06474) | **CO(1--0) imaging reveals 10-kiloparsec molecular gas reservoirs around star-forming galaxies at high redshift**  |
|| M. Rybak, et al. -- incl., <mark>F. Walter</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *Submitted to A&A. 9 pages, 5 figures*|
|**Abstract**|            Massive, intensely star-forming galaxies at high redshift require a supply of molecular gas from their gas reservoirs, replenished by infall from the surrounding circumgalactic medium, to sustain their immense star-formation rates. However, our knowledge of the extent and morphology of their cold-gas reservoirs is still in its infancy. We present the results of stacking 80 hours of JVLA observations of CO(1--0) emission -- which traces the cold molecular gas -- in 19 $z=2.0-4.5$ dusty, star-forming galaxies from the AS2VLA survey. The visibility-plane stack reveals extended emission with a half-light radius of $3.8\pm0.5$~kpc, 2--3$\times$ more extended than the dust-obscured star formation and $1.4\pm0.2\times$ more extended than the stellar emission. Similarly, stacking the [CI](1--0) observations for a subsample of our galaxies yields sizes consistent with CO(1--0). The CO(1--0) size is comparable to the [CII] halos detected around high-redshift star-forming this http URL bulk (up to 80\%) of molecular gas resides outside the star-forming region; only a small part of their molecular gas reservoir directly contributes to their current star formation. Photon-dissociation region modelling indicates that the extended CO(1--0) emission arises from clumpy, dense clouds rather than smooth, diffuse gas.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.06356-b31b1b.svg)](https://arxiv.org/abs/2411.06356) | **Measuring cosmic curvature with non-CMB observations**  |
|| P.-J. Wu, <mark>X. Zhang</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *9 pages, 3 figures*|
|**Abstract**|            The cosmic curvature $\Omega_{K}$ is an important parameter related to the inflationary cosmology and the ultimate fate of the universe. In this work, we adopt the non-CMB observations to constrain $\Omega_{K}$ in the $\Lambda$CDM model and its extensions. The DESI baryon acoustic oscillation, DES type Ia supernova, cosmic chronometer, and strong gravitational lensing time delay data are considered. We find that the data combination favors an open universe in the $\Lambda$CDM model, specifically $\Omega_{K}=0.108\pm0.056$ at the $1\sigma$ confidence level, which is in $2.6\sigma$ tension with the Planck CMB result supporting our universe being slightly closed. In the $\Lambda$CDM extensions, the data combination is consistent with a spatially flat universe. However, the central value of $\Omega_{K}$ is positive and has a significant deviation from zero. We adopt the Akaike information criterion to compare different cosmological models. The result shows that non-flat models fit the observational data better that the flat $\Lambda$CDM model, which adds evidence to the argument that flat $\Lambda$CDM is not the ultimate model of cosmology.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.06372-b31b1b.svg)](https://arxiv.org/abs/2411.06372) | **NEXUS Early Data Release: NIRCam Imaging and WFSS Spectroscopy from the First (Partial) Wide Epoch**  |
|| M.-Y. Zhuang, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *14 pages, 9 figures, 2 tables. Data products are publicly accessible at this https URL. Online interactive map for quick visualization of released images and WFSS spectra can be found at https://ariel. this http URL*|
|**Abstract**|            We present the Early Data Release of the Multi-Cycle JWST-NEXUS Treasury program (2024-2028), which includes NIRCam imaging and WFSS observations from the first (partial) NEXUS-Wide epoch covering the central 100 ${\rm arcmin^2}$ of the NEXUS field, located near the North Ecliptic Pole and within the Euclid Ultra-Deep Field. We release reduced NIRCam mosaics (F090W, F115W, F150W, F200W, F356W, F444W), photometric source catalogs, as well as preliminary WFSS spectra (in F322W2 and F444W) for the subset of bright sources (F356W$<$21 mag or F444W$<$21 mag). These observations fully cover the NEXUS-Deep area, and anchor the long-term baseline of the program. These data will be used for initial target selection for the NIRSpec/MSA spectroscopy starting from June 2025. The NIRCam imaging reaches depths of 27.4--28.2 (AB) mags in F090W--F444W. Upcoming NEXUS-Wide epochs will expand the area to the full $\sim 400\,{\rm arcmin^2}$, and improve the NIRCam exposure depths in the Wide tier by a factor of three. In addition, this central region will be repeatedly covered by the NEXUS-Deep observations (NIRCam imaging and NIRSpec/MSA PRISM spectroscopy) over 18 epochs with a $\sim 2$-month cadence. We demonstrate the data quality of the first NEXUS observations, and showcase some example science cases enabled by these data.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.06647-b31b1b.svg)](https://arxiv.org/abs/2411.06647) | **Multiplicity of Galactic Cepheids from long-baseline interferometry V. High-accuracy orbital parallax and mass of SU Cygni**  |
|| A. Gallenne, et al. -- incl., <mark>S. Kraus</mark>, <mark>D. Mortimer</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *Accepted for publication in A&A*|
|**Abstract**|            Cepheid masses are particularly necessary to help solving the mass discrepancy, while independent distance determinations provide crucial test of the period-luminosity relation and Gaia parallaxes. We used CHARA/MIRC to measure the astrometric positions of the high-contrast companion orbiting the Cepheid SU Cygni. We also present new radial velocity measurements from the HST. The combination of interferometric astrometry with optical and ultraviolet spectroscopy provides the full orbital elements of the system, in addition to component masses and the distance to the Cepheid system. We measured the mass of the Cepheid, $M_A = 4.859\pm0.058M_\odot$, and its two companions, $M_{Ba} = 3.595 \pm 0.033 M_\odot$ and $M_{Bb} = 1.546 \pm 0.009 M_\odot$. This is the most accurate existing measurement of the mass of a Galactic Cepheid (1.2%). Comparing with stellar evolution models, we show that the mass predicted is higher than the measured mass of the Cepheid, similar to conclusions of our previous work. We also measured the distance to the system to be $926.3 \pm 5.0$pc, i.e. an unprecedented parallax precision of $6\mu$as (0.5%), being the most precise and accurate distance for a Cepheid. Such precision is similar to what is expected by Gaia for the last data release (DR5 in $\sim$ 2030) for single stars fainter than G = 13, but is not guaranteed for stars as bright as SU Cyg. We demonstrated that evolutionary models remain inadequate in accurately reproducing the measured mass, often predicting higher masses for the expected metallicity, even when factors such as rotation or convective core overshooting are taken into account. Our precise distance measurement allowed us to compare prediction period-luminosity relations. We found a disagreement of 0.2-0.5 mag with relations calibrated from photometry, while relations calibrated from direct distance measurement are in better agreement.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.06996-b31b1b.svg)](https://arxiv.org/abs/2411.06996) | **Flaring gamma-ray emission coincident with a hyperactive fast radio burst source**  |
|| Y. Xing, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *Comments are welcome*|
|**Abstract**|            Fast radio bursts (FRBs) are bright milliseconds-duration radio bursts from cosmological distances. Despite intense observational and theoretical studies, their physical origin is still mysterious. One major obstacle is the lack of identification of multi-wavelength counterparts for FRBs at cosmological distances. So far, all the searches other than in the radio wavelength, including those in the gamma-ray energies, have only left upper limits. Here we report a gigaelectronvolt (GeV) gamma-ray flare lasting 15.6 seconds as well as additional evidence of variable gamma-ray emission in temporal and spatial association with the hyper-active, newly discovered repeating FRB 20240114A, which has been localized to a dwarf galaxy at a redshift of 0.13. The energetic, short GeV gamma-ray flare reached a prompt isotropic luminosity of the order of ${10}^{48}~{\rm ergs~{s}^{-1}}$. The additional less-significant gamma-ray flares, if true, also have similar luminosities; such flares could contribute to a 5-day average luminosity of the order of ${10}^{45}~{\rm ergs~{s}^{-1}}$. These high-luminosity flares challenge the traditional FRB engine scenario involving a seconds-period magnetar. Rather, it suggests a powerful, long-lived, but newborn energy source at the location of this active repeater, either directly powering the bursts or indirectly triggering bursts in the vicinity of the FRB engine.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.07162-b31b1b.svg)](https://arxiv.org/abs/2411.07162) | **Search for Extended GeV Sources in the Inner Galactic Plane**  |
|| S. Abdollahi, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *43 pages, 17 figures, 11 tables*|
|**Abstract**|            The recent detection of extended $\gamma$-ray emission around middle-aged pulsars is interpreted as inverse-Compton scattering of ambient photons by electron-positron pairs escaping the pulsar wind nebula, which are confined near the system by unclear mechanisms. This emerging population of $\gamma$-ray sources was first discovered at TeV energies and remains underexplored in the GeV range. To address this, we conducted a systematic search for extended sources along the Galactic plane using 14 years of Fermi-LAT data above 10 GeV, aiming to identify a number of pulsar halo candidates and extend our view to lower energies. The search covered the inner Galactic plane ($\lvert l\rvert\leq$ 100$^{\circ}$, $\lvert b\rvert\leq$ 1$^{\circ}$) and the positions of known TeV sources and bright pulsars, yielding broader astrophysical interest. We found 40 such sources, forming the Second Fermi Galactic Extended Sources Catalog (2FGES), most with 68% containment radii smaller than 1.0$^{\circ}$ and relatively hard spectra with photon indices below 2.5. We assessed detection robustness using field-specific alternative interstellar emission models and by inspecting significance maps. Noting 13 sources previously known as extended in the 4FGL-DR3 catalog and five dubious sources from complex regions, we report 22 newly detected extended sources above 10 GeV. Of these, 13 coincide with H.E.S.S., HAWC, or LHAASO sources; six coincide with bright pulsars (including four also coincident with TeV sources); six are associated with 4FGL point sources only; and one has no association in the scanned catalogs. Notably, six to eight sources may be related to pulsars as classical pulsar wind nebulae or pulsar halos.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.05917-b31b1b.svg)](https://arxiv.org/abs/2411.05917) | **VLTI/GRAVITY Observations of AF Lep b: Preference for Circular Orbits, Cloudy Atmospheres, and a Moderately Enhanced Metallicity**  |
|| W. O. Balmer, et al. -- incl., <mark>P. Molliere</mark>, <mark>W. Brandner</mark>, <mark>G. Chauvin</mark>, <mark>T. Henning</mark> |
|*Appeared on*| *2024-11-12*|
|*Comments*| *Accepted to the Astronomical Journal. 12 figures, 4 tables*|
|**Abstract**|            Direct imaging observations are biased towards wide-separation, massive companions that have degenerate formation histories. Although the majority of exoplanets are expected to form via core accretion, most directly imaged exoplanets have not been convincingly demonstrated to follow this formation pathway. We obtained new interferometric observations of the directly imaged giant planet AF Lep b with the VLTI/GRAVITY instrument. We present three epochs of 50$\mu$as relative astrometry and the K-band spectrum of the planet for the first time at a resolution of R=500. Using only these measurements, spanning less than two months, and the Hipparcos-Gaia Catalogue of Accelerations, we are able to significantly constrain the planet's orbit; this bodes well for interferometric observations of planets discovered by Gaia DR4. Including all available measurements of the planet, we infer an effectively circular orbit ($e<0.02, 0.07, 0.13$ at $1, 2, 3 \sigma$) in spin-orbit alignment with the host, and a measure a dynamical mass of $M_\mathrm{p}=3.75\pm0.5\,M_\mathrm{Jup}$. Models of the spectrum of the planet show that it is metal rich ([M/H]$=0.75\pm0.25$), with a C/O ratio encompassing the solar value. This ensemble of results show that the planet is consistent with core accretion formation.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error list index out of range</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2411.06474/./sfr_radius.png', 'tmp_2411.06474/./Mmol_radius.png', 'tmp_2411.06474/./stack_all.png', 'tmp_2411.06474/./stack_all_exponential.png', 'tmp_2411.06474/./stack_FIR_50mhz_detections_noweighting.png']
copying  tmp_2411.06474/./sfr_radius.png to _build/html/
copying  tmp_2411.06474/./Mmol_radius.png to _build/html/
copying  tmp_2411.06474/./stack_all.png to _build/html/
copying  tmp_2411.06474/./stack_all_exponential.png to _build/html/
copying  tmp_2411.06474/./stack_FIR_50mhz_detections_noweighting.png to _build/html/
exported in  _build/html/2411.06474.md
    + _build/html/tmp_2411.06474/./sfr_radius.png
    + _build/html/tmp_2411.06474/./Mmol_radius.png
    + _build/html/tmp_2411.06474/./stack_all.png
    + _build/html/tmp_2411.06474/./stack_all_exponential.png
    + _build/html/tmp_2411.06474/./stack_FIR_50mhz_detections_noweighting.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\comDS}[1]{\textcolor{blue}{#1} ~}$
$\newcommand{\avgg}[1]{\left< #1 \right>}$</div>



<div id="title">

# CO(1--0) imaging reveals 10-kiloparsec molecular gas reservoirs around star-forming galaxies at high redshift

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2411.06474-b31b1b.svg)](https://arxiv.org/abs/2411.06474)<mark>Appeared on: 2024-11-12</mark> -  _Submitted to A&A. 9 pages, 5 figures_

</div>
<div id="authors">

M. Rybak, et al. -- incl., <mark>F. Walter</mark>

</div>
<div id="abstract">

**Abstract:** Massive, intensely star-forming galaxies at high redshift require a supply of molecular gas from their gas reservoirs, replenished by infall from the surrounding circumgalactic medium, to sustain their immense star-formation rates. However, our knowledge of the extent and morphology of their cold-gas reservoirs is still in its infancy.  We present the results of stacking 80 hours of JVLA observations of CO(1--0) emission -- which traces the cold molecular gas -- in 19 $z=2.0-4.5$ dusty, star-forming galaxies from the AS2VLA survey. The visibility-plane stack reveals extended emission with a half-light radius of $3.8\pm0.5$ kpc, 2--3 $\times$ more extended than the dust-obscured star formation and $1.4\pm0.2\times$ more extended than the stellar emission. Similarly, stacking the [ $\ion{C}{i}$ ] (1--0) observations for a subsample of our galaxies yields sizes consistent with CO(1--0). The CO(1--0) size is comparable to the [ $\ion{C}{ii}$ ] halos detected around high-redshift star-forming galaxies.  The bulk (up to 80 \% ) of molecular gas resides outside the star-forming region; only a small part of their molecular gas reservoir directly contributes to their current star formation. Photon-dissociation region modelling indicates that the extended CO(1--0) emission arises from clumpy, dense clouds rather than smooth, diffuse gas.

</div>

<div id="div_fig1">

<img src="tmp_2411.06474/./sfr_radius.png" alt="Fig5.1" width="50%"/><img src="tmp_2411.06474/./Mmol_radius.png" alt="Fig5.2" width="50%"/>

**Figure 5. -** Half-light radii of CO(1--0) and [$\ion${C}{ii}] emission versus star-formation rate (_left_) and total molecular gas mass (_right_) for galaxies from our sample and literature (see Section \ref{subsec:sizes} references). The protoclusters from [Emonts, Lehnert and Villar-Martín (2016)]() and [Dannerbauer, Lehnert and Emonts (2017)]() are highlighted by circles. We also show predictions for $z=3.1$ galaxies from the SIMBA simulation  ([Davé, Anglés-Alcázar and Narayanan 2019]())  as open grey symbols; {grey lines indicate the running average}. The inferred size of the CO(1--0) emission in our sample is consistent to the extended CO(1--0) and [$\ion${C}{ii}] reservoirs around other high-redshift DSFGs with comparable SFR. (*fig:r_mmol_sigma*)

</div>
<div id="div_fig2">

<img src="tmp_2411.06474/./stack_all.png" alt="Fig4.1" width="50%"/><img src="tmp_2411.06474/./stack_all_exponential.png" alt="Fig4.2" width="50%"/>

**Figure 4. -** Stacked CO(1--0) image-plane and $uv$-plane data for 19 sources with robust CO(1--0) detections.
    For the image-plane stack, the contours are drawn at $\pm(2,4,6...)\sigma$; the white ellipses indicate the mean FWHM beam size. The $uv$-plane data are integrated over $\pm$1 FWHM velocity range, radially binned with a step of 5 k$\lambda$. For CO(1--0), the exponential model with $R_\mathrm{1/2}$=$0.49\pm0.07"$(3.8$\pm$0.5 kpc at our median $z=3.1$) is strongly preferred by the evidence. (*fig:stack_image*)

</div>
<div id="div_fig3">

<img src="tmp_2411.06474/./stack_FIR_50mhz_detections_noweighting.png" alt="Fig1" width="100%"/>

**Figure 1. -** Rest-frame stacked spectrum, extracted from Cleaned cubes and normalised to the median flux-weighted redshift $z=3.1$. The rest-frame frequency resolution is 50 MHz ($\approx$125 km s$^{-1}$). The stacked spectrum is consistent with a Gaussian profile with a FWHM of $640\pm70$ km s$^{-1}$; we do not find any evidence for outflow signatures. The slight positive /negative excess at higher/lower frequencies is likely a weak residual continuum signal and is not associated with the CO(1--0) line. (*fig:spectral_stack*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2411.06474"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

244  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

6  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
