# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Davies  ->  J. Davies  |  ['J. Davies']
H. Linz  ->  H. Linz  |  ['H. Linz']
J. Bouwman  ->  J. Bouwman  |  ['J. Bouwman']
T. Henning  ->  T. Henning  |  ['T. Henning']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
K. Lee  ->  K. Lee  |  ['K. Lee']
J. Li  ->  J. Li  |  ['J. Li']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
Arxiv has 58 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2502.20447
extracting tarball to tmp_2502.20447...

 done.
Retrieving document from  https://arxiv.org/e-print/2502.20472
extracting tarball to tmp_2502.20472... done.
Retrieving document from  https://arxiv.org/e-print/2502.20722


not a gzip file


Retrieving document from  https://arxiv.org/e-print/2502.20820
extracting tarball to tmp_2502.20820...

 done.
Retrieving document from  https://arxiv.org/e-print/2502.20841


extracting tarball to tmp_2502.20841... done.
Retrieving document from  https://arxiv.org/e-print/2502.21119


extracting tarball to tmp_2502.21119... done.


Found 150 bibliographic references in tmp_2502.21119/paper.bbl.
Error retrieving bib data for  cameron_jades_2023: 'author'
Error retrieving bib data for  Yang_blueberries: 'author'


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.21119-b31b1b.svg)](https://arxiv.org/abs/2502.21119) | **Detection of the 2175Å UV Bump at z>7: Evidence for Rapid Dust Evolution in a Merging Reionisation-Era Galaxy**  |
|| K. Ormerod, et al. -- incl., <mark>A. d. Graaff</mark> |
|*Appeared on*| *2025-03-04*|
|*Comments*| *Submitted to MNRAS*|
|**Abstract**|            Dust is a fundamental component of the interstellar medium (ISM) within galaxies, as dust grains are highly efficient absorbers of UV and optical photons. Accurately quantifying this obscuration is crucial for interpreting galaxy spectral energy distributions (SEDs). The extinction curves in the Milky Way (MW) and Large Magellanic Cloud (LMC) exhibit a strong feature known as the 2175A UV bump, most often attributed to small carbonaceous dust grains. This feature was recently detected in faint galaxies out to z~7 suggesting rapid formation channels. Here we report the detection of a strong UV bump in a luminous Lyman-break galaxy at z = 7.11235, GNWY-7379420231, through observations taken as part of the NIRSpec Wide GTO survey. We fit a dust attenuation curve that is consistent with the MW extinction curve within 1{\sigma}, in a galaxy just ~700 Myr after the Big Bang. From the integrated spectrum, we infer a young mass-weighted age (t* ~ 22-59 Myr) for this galaxy, however spatially resolved SED fitting unveils the presence of an older stellar population (t* ~ 252 Myr). Furthermore, morphological analysis provides evidence for a potential merger. The underlying older stellar population suggests the merging system could be pre-enriched, with the dust illuminated by a merger-induced starburst. Moreover, turbulence driven by stellar feedback in this bursty region may be driving PAH formation through top-down shattering. The presence of a UV bump in GNWY-7379420231 solidifies growing evidence for the rapid evolution of dust properties within the first billion years of cosmic time.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.20472-b31b1b.svg)](https://arxiv.org/abs/2502.20472) | **Simple molecules and complex chemistry in a protoplanetary disk: A JWST investigation of the highly inclined disk d216-0939**  |
|| A. Potapov, et al. -- incl., <mark>H. Linz</mark>, <mark>J. Bouwman</mark>, <mark>T. Henning</mark> |
|*Appeared on*| *2025-03-04*|
|*Comments*| *10 pages, 7 figures, 6 tables, accepted by A&A on February 20, 2025*|
|**Abstract**|            While the number of detected molecules, particularly complex organic molecules, in the solid-state in astrophysical environments is still rather limited, laboratory experiments and astrochemical models predict many potential candidates. Detection of molecules in protoplanetary disks provides a bridge between the chemical evolution of the interstellar medium and the chemistry of planets and their atmospheres. The excellent spectral sensitivity, broad wavelength coverage and high spatial resolution of the James Webb Space Telescope (JWST) allows for making progress in exploring chemical compositions of various astrophysical environments including planet-forming disks. They are a prerequisite for probing the disk content by means of sensitive absorption studies. In this paper, we present initial results of the JWST Cycle 1 GO program 1741 on d216-0939, a highly inclined TTauri disk located in the outskirts of the Orion Nebula Cluster. We utilise the NIRSpec and MIRI integral field unit spectrographs to cover its spectrum from 1.7 to 28~$\mu$m. In the d216-0939 disk, we give assignments of the composition of silicate grains. We unambiguously detect solid-state features of H$_2$O, CO$_2$, $^{13}$CO$_2$, CO, OCN$^-$, and tentatively OCS; species that had been detected recently also in other circumstellar disks. For the first time in disks, we provide unique detections of ices carrying NH$_4^+$ and the complex organic molecule ammonium carbamate (NH$_4^+$NH$_2$COO$^-$). The latter detections speak for a very efficient NH$_3$ chemistry in the disk. We also show the very important role of scattering in the analysis of observational spectra of highly inclined disks.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.20447-b31b1b.svg)](https://arxiv.org/abs/2502.20447) | **Detecting galaxy-21-cm cross-correlation during reionization**  |
|| S. Gagnon-Hartman, <mark>J. Davies</mark>, A. Mesinger |
|*Appeared on*| *2025-03-04*|
|*Comments*| *14 pages, 12 figures. submitted to A&A*|
|**Abstract**|            The cosmic 21-cm signal promises to revolutionize studies of the Epoch of Reionization (EoR). Radio interferometers are aiming for a preliminary, low signal-to-noise (S/N) detection of the 21-cm power spectrum. Cross-correlating 21-cm with galaxies will be especially useful in these efforts, providing both a sanity check for initial 21-cm detection claims and potentially increasing the S/N due to uncorrelated residual systematics. Here we self-consistently simulate large-scale (1 Gpc) galaxy and 21-cm fields, computing their cross-power spectra for various choices of instruments as well as survey properties. We use 1080h observations with SKA-low AA* and HERA-350 as our benchmark 21-cm observations. We create mock Lyman alpha narrow-band, slitless and slit spectroscopic surveys, using benchmarks from instruments such as Subaru HyperSupremeCam, Roman grism, VLT MOONS, ELT MOSAIC, and JWST NIRCam. We forecast the resulting S/N of the galaxy-21-cm cross power spectrum, varying the galaxy survey area, depth and level of 21-cm foreground contamination for each pair of instruments. We find that the highest S/N is achievable through slitless, wide-area spectroscopic surveys, with the proposed Roman HLS survey resulting in a 55$\sigma$ detection of the cross power with 21-cm as observed with SKA-low AA* for our fiducial model. Narrow-band dropout surveys are unlikely to result in a detectable cross-power, due to their poor redshift localization. Slit spectroscopy can provide a high S/N detection of the cross power for SKA-low AA* observations. Specifically, the planned MOONRISE survey with MOONS on the VLT can result in a 3$\sigma$ detection, while a survey of comparable observing time using MOSAIC on the ELT can result in a 4$\sigma$ detection. Our results can be used to guide survey strategies, facilitating the detection of the galaxy-21-cm cross power spectrum.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.20820-b31b1b.svg)](https://arxiv.org/abs/2502.20820) | **The Chinese pulsar timing array data release I. Polarimetry for 56 millisecond pulsars**  |
|| J. Xu, et al. -- incl., <mark>K. Lee</mark> |
|*Appeared on*| *2025-03-04*|
|*Comments*| *17 pages, 10 figures, 2 tables, accepted by A&A*|
|**Abstract**|            We present polarization pulse profiles for 56 millisecond pulsars (MSPs) monitored by the Chinese Pulsar Timing Array (CPTA) collaboration using the Five-hundred-meter Aperture Spherical radio Telescope (FAST). The observations centered at 1.25 GHz with a raw bandwidth of 500 MHz. Due to the high sensitivity ($\sim$16 K/Jy) of the FAST telescope and our long integration time, the high signal-to-noise ratio polarization profiles show features hardly detected before. Among 56 pulsars, the polarization profiles of PSRs J0406$+$3039, J1327$+$3423, and J2022$+$2534 were not previously reported. 80\% of MSPs in the sample show weak components below 3\% of peak flux, 25\% of pulsars show interpulse-like structures, and most pulsars show linear polarization position angle jumps. Six pulsars seem to be emitting for full rotation phase, with another thirteen pulsars being good candidates for such a 360$^\circ$ radiator. We find that the distribution of the polarization percentage in our sample is compatible with the normal pulsar distribution. Our detailed evaluation of the MSP polarization properties suggests that the wave propagation effects in the pulsar magnetosphere are important in shaping the MSP polarization pulse profiles.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.20841-b31b1b.svg)](https://arxiv.org/abs/2502.20841) | **Calibrating the Color-Magnitude Relation of M Giants by Using Open Clusters**  |
|| X. Tang, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-03-04*|
|*Comments*| **|
|**Abstract**|            M giants, with their distinctive properties such as high luminosity, serve as excellent indicators for mapping the structure of the Milky Way. The distance to distant M giants can be determined by using the color-magnitude relation (CMR), which is derived from color-magnitude diagrams of specific systems in previous studies. In this work, we aimed to achieve more accurate distance determination for M giants by focusing on open clusters (OCs) with a large number of member stars and thus improve the CMR. For the first time, we compiled a census of OCs harboring M giants using Gaia Data Release 3 (DR3) and Large Sky Area Multi-Object Fiber Spectroscopic Telescope Data Release 9. We identified 58 M giants associated with 43 OCs and obtained their astrometric and photometric parameters from Gaia DR3. Using the distances of these OCs, we derived the CMR for M giants as a linear correlation, expressed as $M_{Ks}=3.85-8.26(J-K_s$). This linear relation proved superior to the empirical distance relation in characterizing the CMR of M giants. The photometric distances of M giants derived from the CMR are consistent with the parallax distances from Gaia and known spectroscopic distances, with median deviations of 1.5% and 2.3%, respectively. Using the distances of M giants derived from the CMR, we computed their radial velocity ($V_R$), azimuthal velocity ($V{\phi}$), and vertical velocity ($V_Z$), respectively. The distributions of these velocities revealed key features of the Galactic disk, including oscillation, north-south rotational asymmetry, and warp. These findings are consistent with previous studies and further validate the reliability of the derived CMR.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.20722-b31b1b.svg)](https://arxiv.org/abs/2502.20722) | **Internal Heat and Energy Imbalance of Uranus**  |
|| X. Wang, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-03-04*|
|*Comments*| **|
|**Abstract**|            With its extreme axial tilt, radiant energy budget and internal heat of Uranus remain among the most intriguing mysteries of our Solar System. Here, we present the global radiant energy budget spanning a complete orbital period, revealing significant seasonal variations driven primarily by the highly variable solar flux. Despite these fluctuations, emitted thermal power consistently exceeds absorbed solar power, indicating a net energy loss and ongoing global cooling. Based on the seasonal variations of radiant energy budget, we determine a statistically significant internal heat flux. This finding resolves a long-standing debate over whether Uranus possesses internal heat. We also examine the energy budget of the weather layer by combining the internal heat with the radiant energies, revealing significant energy imbalances at both global and hemispheric scales. These global and hemispheric imbalances should be considered in theoretical and numerical models. The Uranus flagship mission, as recommended by the recent survey, will provide crucial observations to address more unresolved questions and advance our understanding of this enigmatic ice giant.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2502.21119/./Figures/001576_uv_bump_windows.png', 'tmp_2502.21119/./Figures/O32_R23.png', 'tmp_2502.21119/./Figures/bagpipes_models.png']
copying  tmp_2502.21119/./Figures/001576_uv_bump_windows.png to _build/html/
copying  tmp_2502.21119/./Figures/O32_R23.png to _build/html/
copying  tmp_2502.21119/./Figures/bagpipes_models.png to _build/html/
exported in  _build/html/2502.21119.md
    + _build/html/tmp_2502.21119/./Figures/001576_uv_bump_windows.png
    + _build/html/tmp_2502.21119/./Figures/O32_R23.png
    + _build/html/tmp_2502.21119/./Figures/bagpipes_models.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\um}{\mu\mathrm{m}}$
$\newcommand{\id}{GNWY-7379420231}$
$\newcommand{\orcid}[2]{\href{http://orcid.org/#2}{#1}}$
$\newcommand{\orcidsymb}[2]{#1\href{http://orcid.org/#2}{\adjustbox{trim={-.15\width} {0\height} {-.15\width} {0\height},clip}{\includegraphics[height=10pt]{Figures/orcid.pdf}}}}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Detection of the $2175$${Å}$ UV Bump at $z>7$: Evidence for Rapid Dust Evolution in a Merging Reionisation-Era Galaxy

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2502.21119-b31b1b.svg)](https://arxiv.org/abs/2502.21119)<mark>Appeared on: 2025-03-04</mark> -  _Submitted to MNRAS_

</div>
<div id="authors">

K. Ormerod, et al. -- incl., <mark>A. d. Graaff</mark>

</div>
<div id="abstract">

**Abstract:** Dust is a fundamental component of the interstellar medium (ISM) within galaxies, as dust grains are highly efficient absorbers of UV and optical photons. Accurately quantifying this obscuration is crucial for interpreting galaxy spectral energy distributions (SEDs). The extinction curves in the Milky Way (MW) and Large Magellanic Cloud (LMC) exhibit a strong feature known as the $2175$ Å UV bump, most often attributed to small carbonaceous dust grains. This feature was recently detected in faint galaxies out to $z\sim7$ suggesting rapid formation channels.Here we report the detection of a strong UV bump in a luminous Lyman-break galaxy at $z=7.11235 $ , GNWY-7379420231,   through observations taken as part of the NIRSpec Wide GTO survey. We fit a dust attenuation curve that is consistent with the MW extinction curve within $1\sigma$ , in a galaxy just $\sim 700$ Myr after the Big Bang.From the integrated spectrum, we infer a young mass-weighted age ( $t_\star \sim 22-59$ Myr) for this galaxy, however spatially resolved SED fitting unveils the presence of an older stellar population ( $t_\star \sim 252$ Myr). Furthermore, morphological analysis provides evidence for a potential merger. The underlying older stellar population suggests the merging system could be pre-enriched, with the dust illuminated by a merger-induced starburst. Moreover, turbulence driven by stellar feedback in this bursty region may be driving PAH formation through top-down shattering.The presence of a UV bump in GNWY-7379420231 solidifies growing evidence for the rapid evolution of dust properties within the first billion years of cosmic time.

</div>

<div id="div_fig1">

<img src="tmp_2502.21119/./Figures/001576_uv_bump_windows.png" alt="Fig12" width="100%"/>

**Figure 12. -** Spectrum of GNWY-7379420231 (grey solid line) with a power-law fit to the UV continuum (red solid line). The dark red shading represents the UV slope fitting windows. The zoom in panel of the region around 2175Å shows the running median, indicated by a solid black line. This represents the attenuated stellar continuum, and shows a localised absorption feature. The Drude profile fit is shown by the solid blue line, within the fitting window indicated by the vertical dashed lines. The hatched region shows the location of the Ciii doublet. The bottom right panel shows the residuals of the power-law fit (PL) and the combined power-law and Drude profile fit (PL+Drude). The power-law fit alone has a $7.0\sigma$ negative flux excess, with the PL+Drude model showing a significantly better fit, as supported by the BIC values.  (*fig:UV fit*)

</div>
<div id="div_fig2">

<img src="tmp_2502.21119/./Figures/O32_R23.png" alt="Fig2" width="100%"/>

**Figure 2. -** Dust corrected O$_{32}$-R$_{23}$ plot, showing GNWY-7379420231 compared to NIRSpec data  ([ and Heintz 2024](https://ui.adsabs.harvard.edu/abs/2024arXiv240402211H),  cameron_jades_2023, [ and Witten 2025](https://ui.adsabs.harvard.edu/abs/2025MNRAS.537..112W)) , local analogues  ([ and Yang 2017](https://ui.adsabs.harvard.edu/abs/2017ApJ...844..171Y),  Yang_blueberries) , and the Sloan Digital Sky Survey \citep[SDSS;][]{SDSS} Data Release 7  ([ and Abazajian 2009](https://ui.adsabs.harvard.edu/abs/2009ApJS..182..543A)) , shown in black. The red shading shows the region $\log_{10}\mathrm{O}{32} < 0.7$ and $\log_{10}\mathrm{R}_{23} >0.9$ which may contain galaxies hosting an older stellar population  ([ and Witten 2025](https://ui.adsabs.harvard.edu/abs/2025MNRAS.537..112W)) . (*fig:O32_R23*)

</div>
<div id="div_fig3">

<img src="tmp_2502.21119/./Figures/bagpipes_models.png" alt="Fig6" width="100%"/>

**Figure 6. -** Top: Posterior spectra obtained from bagpipes fitting. The observed spectrum and associated errors are shown in grey, with the observed NIRCam photometry shown in black. The x error bars represent the filter width at $50\%$ of the maximum transmission. The posterior photometric points obtained from bagpipes are shown by open squares. Bottom: The residuals from the bagpipes fitting. The dashed vertical lines show the UV bump fitting region, and the blue shaded regions show the spectral regions masked in the bagpipes fitting. (*fig:bagpipes_models*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2502.21119"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

344  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
