# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

I. J. M. Crossfield  ->  I. J. M. Crossfield  |  ['I. J. M. Crossfield']
N. Storm  ->  N. Storm  |  ['N. Storm']
P. Eitner  ->  P. Eitner  |  ['P. Eitner']
M. Bergemann  ->  M. Bergemann  |  ['M. Bergemann']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J.A.  ->  S. Jiao  |  ['Jiao']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']


M. Fouesneau  ->  M. Fouesneau  |  ['M. Fouesneau']
Arxiv has 89 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2506.17550


extracting tarball to tmp_2506.17550...

 done.
Retrieving document from  https://arxiv.org/e-print/2506.17711


extracting tarball to tmp_2506.17711...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 107 bibliographic references in tmp_2506.17711/mnras_template.bbl.
Issues with the citations
syntax error in line 144: '=' expected
Retrieving document from  https://arxiv.org/e-print/2506.17926


extracting tarball to tmp_2506.17926... done.
Retrieving document from  https://arxiv.org/e-print/2506.17933


extracting tarball to tmp_2506.17933... done.
Retrieving document from  https://arxiv.org/e-print/2506.18277


extracting tarball to tmp_2506.18277...

 done.


Found 29 bibliographic references in tmp_2506.18277/HAWC_Performance_Enhanced_by_Machine_Learning_in_Gamma-Hadron_Separation.bbl.
Retrieving document from  https://arxiv.org/e-print/2506.18477
extracting tarball to tmp_2506.18477... done.
Retrieving document from  https://arxiv.org/e-print/2506.18708


extracting tarball to tmp_2506.18708...

 done.


M. Fouesneau  ->  M. Fouesneau  |  ['M. Fouesneau']


Found 112 bibliographic references in tmp_2506.18708/aa52614-24.bbl.
Issues with the citations
syntax error in line 522: '=' expected


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.17711-b31b1b.svg)](https://arxiv.org/abs/2506.17711) | **Current Galactic Chemical Evolution models fail to explain rising Na-abundances of young thick disc stars**  |
|| E. K. Owusu, et al. -- incl., <mark>N. Storm</mark>, <mark>P. Eitner</mark>, <mark>M. Bergemann</mark> |
|*Appeared on*| *2025-06-24*|
|*Comments*| *12 pages, 5 figures, 3 tables*|
|**Abstract**|            We recently identified an upturn in [Na/Fe] for the population of Solar-type stars in the Galactic thick disc ($-0.3 < [\mathrm{Fe/H}] < +0.3$ dex) at super-Solar metallicity in GALactic Archaeology with HERMES (GALAH) data. Here, we investigate the cause of this unexplained Na enrichment between ([Fe/H] $\sim 0 - 0.6$ dex) using the OMEGA$+$ galactic chemical evolution code. We investigate the increase of [Na/Fe] with four combinations of nucleosynthetic yields from the literature, with source contributions from core-collapse supernovae, asymptotic giant branch stars, and Type Ia supernovae. We focus on two possible causes for the Na-enhancement: the "metallicity effect" resulting from core-collapse supernovae at super-Solar metallicity and the contribution of metal-rich AGB stars. We adopt two sets of Type Ia supernova yields with one model assuming only Chandrasekhar-mass explosions, and another assuming only sub-Chandrasekhar-mass explosions. We find that the assumed Type Ia explosion has little effect on the [Na/Fe] Galactic chemical evolution modelling, and all Galactic chemical evolution models tested fail to reproduce the observed [\mathrm{Na/Fe}] enrichment in the young thick disc population at super-Solar metallicities. Our study indicates a possible "under-pollution effect" by SNe Ia, which are the dominant producers of iron, in the Galactic disc's Solar-type star population. These findings provide a step forward toward understanding the origin of the unexplained sodium enrichment at super-Solar metallicities in the Galactic disc.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.18277-b31b1b.svg)](https://arxiv.org/abs/2506.18277) | **HAWC Performance Enhanced by Machine Learning in Gamma-Hadron Separation**  |
|| R. Alfaro, et al. -- incl., <mark>J.A.</mark> |
|*Appeared on*| *2025-06-24*|
|*Comments*| **|
|**Abstract**|            Improving gamma-hadron separation is one of the most effective ways to enhance the performance of ground-based gamma-ray observatories. With over a decade of continuous operation, the High-Altitude Water Cherenkov (HAWC) Observatory has contributed significantly to high-energy astrophysics. To further leverage its rich dataset, we introduce a machine learning approach for gamma-hadron separation. A Multilayer Perceptron shows the best performance, surpassing traditional and other Machine Learning based methods. This approach shows a notable improvement in the detector's sensitivity, supported by results from both simulated and real HAWC data. In particular, it achieves a 19\% increase in significance for the Crab Nebula, commonly used as a benchmark. These improvements highlight the potential of machine learning to significantly enhance the performance of HAWC and provide a valuable reference for ground-based observatories, such as Large High Altitude Air Shower Observatory (LHAASO) and the upcoming Southern Wide-field Gamma-ray Observatory (SWGO).         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.18708-b31b1b.svg)](https://arxiv.org/abs/2506.18708) | **The completeness of the open cluster census towards the Galactic anticentre**  |
|| E. L. Hunt, et al. -- incl., <mark>M. Fouesneau</mark> |
|*Appeared on*| *2025-06-24*|
|*Comments*| *18 pages, 13 figures. Accepted in A&A*|
|**Abstract**|            Open clusters have long been used as tracers of Galactic structure. However, without a selection function to describe the completeness of the cluster census, it is difficult to quantitatively interpret their distribution. We create a method to empirically determine the selection function of a Galactic cluster catalogue. We test it by investigating the completeness of the cluster census in the outer Milky Way, where old and young clusters exhibit different spatial distributions. We develop a method to generate realistic mock clusters as a function of their parameters, in addition to accounting for Gaia's selection function and astrometric errors. We then inject mock clusters into Gaia DR3 data, and attempt to recover them in a blind search using HDBSCAN. We find that the main parameters influencing cluster detectability are mass, extinction, and distance. Age also plays an important role, making older clusters harder to detect due to their fainter luminosity function. High proper motions also improve detectability. After correcting for these selection effects, we find that old clusters are $2.97\pm0.11$ times more common at a Galactocentric radius of 13~kpc than in the solar neighbourhood -- despite positive detection biases in their favour, such as hotter orbits or a higher scale height. The larger fraction of older clusters in the outer Galaxy cannot be explained by an observational bias, and must be a physical property of the Milky Way: young outer-disc clusters are not forming in the outer Galaxy, or at least not with sufficient masses to be identified as clusters in Gaia DR3. We predict that in this region, more old clusters than young ones remain to be discovered. The current presence of old, massive outer-disc clusters could be explained by radial heating and migration, or alternatively by a lower cluster destruction rate in the anticentre.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.17550-b31b1b.svg)](https://arxiv.org/abs/2506.17550) | **A High Geometric Albedo for LTT9779b Points Towards a Metal-rich Atmosphere and Silicate Clouds**  |
|| S. Saha, et al. -- incl., <mark>I. J. M. Crossfield</mark> |
|*Appeared on*| *2025-06-24*|
|*Comments*| *18 pages, 11 figures, 3 tables, Accepted for publication in A&A*|
|**Abstract**|            Aims: In this work, we aim to confirm the high albedo of the benchmark ultrahot Neptune LTT9779b using 20 secondary eclipse measurements of the planet observed with CHEOPS. In addition, we perform a search for variability in the reflected light intensity of the planet as a function of time. Methods: First, we used the TESS follow-up data of LTT9779b from three sectors (2, 29, and 69) to remodel the transit signature and estimate an updated set of transit and ephemeris parameters, which were directly used in the modeling of the secondary eclipse lightcurves. This involved a critical noise-treatment algorithm, including sophisticated techniques such as wavelet denoising and Gaussian Process (GP) regression, to constrain noise levels from various sources. In addition to using the officially released reduced aperture photometry data from CHEOPS DRP, we also reduced the raw data using an independent PSF photometry pipeline, known as PIPE, to verify the robustness of our analysis. The extracted secondary eclipse lightcurves were modeled using the PYCHEOPS package, where we have detrended the background noise correlated with the spacecraft roll angle, originating from the inhomogeneous and asymmetric shape of the CHEOPS point spread function, using an N-order glint function. Results: Our independent lightcurve analyses have resulted in consistent estimations of the eclipse depths, with values of 89.9$\pm$13.7 ppm for the DRP analysis and 85.2$\pm$13.1 ppm from PIPE, indicating a high degree of statistical agreement. Adopting the DRP value yields a highly constrained geometric albedo of 0.73$\pm$0.11. No significant eclipse depth variability is detected down to a level of $\sim$37 ppm. Conclusions: Our results confirm that LTT9779b exhibits a strikingly high optical albedo, which substantially reduces the internal energy budget of the planet compared to more opaque...         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.17926-b31b1b.svg)](https://arxiv.org/abs/2506.17926) | **Cosmic Distance Duality Relation with DESI DR2 and Transparency**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2025-06-24*|
|*Comments*| **|
|**Abstract**|            The Cosmic Distance Duality Relation (CDDR), a fundamental assumption in modern cosmology, posits a direct link between angular diameter distance and luminosity distance. This study presents a comprehensive, model-independent, and data-driven test of the CDDR using a combination of cosmological observations, including Supernovae (SN), Baryon Acoustic Oscillations (BAO), and Hubble parameter ($H(z)$) measurements. We employ both Gaussian Process Regression (GPR) and a novel Compressed Point (CPI) method for reconstructing the CDDR, alongside four distinct parameterizations for potential deviations. Nuisance parameters, such as the supernova absolute magnitude and BAO scale, are rigorously handled via both joint numerical fitting (Method I) and analytic marginalization (Method II). Our findings reveal that while direct reconstruction of the CDDR exhibits no significant deviation (less than 1-$\sigma$) under specific prior assumptions, a notable departure emerges when the SH0ES prior is incorporated, suggesting a systematic influence from the Hubble constant tension. Independently, our parameterized analysis corroborates the consistency of CDDR and confirms the equivalence of the two constraint methodologies. We also find no significant evidence for cosmic opacity. A comparative assessment of reconstruction techniques indicates that GPR generally yields higher precision. These results emphasize the critical role of prior choices and statistical methods in CDDR analyses, providing valuable insights into fundamental cosmological principles and the ongoing Hubble tension.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.17933-b31b1b.svg)](https://arxiv.org/abs/2506.17933) | **Holographic dark energy models in $f(Q,T)$ gravity and cosmic constraint**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2025-06-24*|
|*Comments*| **|
|**Abstract**|            In this work, we propose a new model that combines holographic dark energy with modified gravity $f(Q,T)$ to explore a possible explanation for the accelerated expansion of the universe. We incorporate the holographic principle into non-metric gravity with non-minimal matter coupling and introduce the Barrow holographic dark energy model to account for a tighter corrections, allowing for a more generalized discussion. Furthermore, we perform parameter estimation using the latest observational data, including Type Ia supernova, BAO and Hubble parameter direct measurements. Our results show that the model provides a theoretical framework to describe late-time cosmic evolution and the universe's accelerated expansion. Despite the additional complexity introduced, the model offers a viable approach for investigating dark energy within modified gravity theories.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.18477-b31b1b.svg)](https://arxiv.org/abs/2506.18477) | **Cosmic Sign-Reversal: Non-Parametric Reconstruction of Interacting Dark Energy with DESI DR2**  |
|| Y.-H. Li, <mark>X. Zhang</mark> |
|*Appeared on*| *2025-06-24*|
|*Comments*| *8 pages, 4 figures*|
|**Abstract**|            A direct interaction between dark energy and dark matter provides a natural and important extension to the standard $\Lambda$CDM cosmology. We perform a non-parametric reconstruction of the vacuum energy ($w=-1$) interacting with cold dark matter using the cosmological data from DESI DR2, Planck CMB, and three SNIa samples (PP, DESY5, and Union3). By discretizing the coupling function $\beta(z)$ into 20 redshift bins and assuming a Gaussian smoothness prior, we reconstruct $\beta(z)$ without assuming any specific parameterization. The mean reconstructed $\beta(z)$ changes sign during cosmic evolution, indicating an energy transfer from cold dark matter to dark energy at early times and a reverse flow at late times. At high redshifts, $\beta(z)$ shows a $\sim 2\sigma$ deviation from $\Lambda$CDM. At low redshifts, the results depend on the SNIa sample: CMB+DESI and CMB+DESI+PP yield $\beta(z)$ consistent with zero within $2\sigma$, while CMB+DESI+DESY5 and CMB+DESI+Union3 prefer negative $\beta$ at $\sim2\sigma$. Both $\chi^2$ tests and Bayesian analyses favor the $\beta(z)$ model, with CMB+DESI DR2+DESY5 showing the most significant support through the largest improvement in goodness of fit ($\Delta\chi^2_{\rm MAP}=-17.76$) and strongest Bayesian evidence ($\ln\mathcal{B} = 5.98 \pm 0.69$). Principal component analysis reveals that the data effectively constrain three additional degrees of freedom in the $\beta(z)$ model, accounting for most of the improvement in goodness of fit. Our results demonstrate that the dynamical dark energy preference in current data can be equally well explained by such a sign-reversal interacting dark energy, highlighting the need for future observations to break this degeneracy.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2506.17711/./figures/figure5.png', 'tmp_2506.17711/./figures/figure2.png', 'tmp_2506.17711/./figures/figure1.png']
copying  tmp_2506.17711/./figures/figure5.png to _build/html/
copying  tmp_2506.17711/./figures/figure2.png to _build/html/
copying  tmp_2506.17711/./figures/figure1.png to _build/html/
exported in  _build/html/2506.17711.md
    + _build/html/tmp_2506.17711/./figures/figure5.png
    + _build/html/tmp_2506.17711/./figures/figure2.png
    + _build/html/tmp_2506.17711/./figures/figure1.png
found figures ['tmp_2506.18277/./efficiency_C0_effi.png', 'tmp_2506.18277/./efficiency_C1_effi.png', 'tmp_2506.18277/./efficiency_C0_qfactor.png', 'tmp_2506.18277/./efficiency_C1_qfactor.png', 'tmp_2506.18277/./roc_curve_c0_log_blind2.png', 'tmp_2506.18277/./roc_curve_c1_log_blind2.png']
copying  tmp_2506.18277/./efficiency_C0_effi.png to _build/html/
copying  tmp_2506.18277/./efficiency_C1_effi.png to _build/html/
copying  tmp_2506.18277/./efficiency_C0_qfactor.png to _

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Current Galactic Chemical Evolution models fail to explain rising Na-abundances of young thick disc stars

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.17711-b31b1b.svg)](https://arxiv.org/abs/2506.17711)<mark>Appeared on: 2025-06-24</mark> -  _12 pages, 5 figures, 3 tables_

</div>
<div id="authors">

E. K. Owusu, et al. -- incl., <mark>N. Storm</mark>, <mark>P. Eitner</mark>, <mark>M. Bergemann</mark>

</div>
<div id="abstract">

**Abstract:** We recently identified an upturn in [ Na/Fe ] for the population of Solar-type stars in the Galactic thick disc ( $-0.3 < \mathrm{[Fe/H]} < +0.3$ dex) at super-Solar metallicity in GALactic Archaeology with HERMES (GALAH) data. Here, we investigate the cause of this unexplained Na enrichment between ( [ Fe/H ] $\approx 0$ -- $0.6$ dex) using the OMEGA $+$ galactic chemical evolution code. We investigate the increase of [ Na/Fe ] with four combinations of nucleosynthetic yields from the literature, with source contributions from core-collapse supernovae, asymptotic giant branch stars, and Type Ia supernovae. We focus on two possible causes for the Na-enhancement: the `metallicity effect’ resulting from core-collapse supernovae at super-Solar metallicity and the contribution of metal-rich AGB stars. We adopt two sets of Type Ia supernova yields with one model assuming only Chandrasekhar-mass explosions, and another assuming only sub-Chandrasekhar-mass explosions. We find that the assumed Type Ia explosion has little effect on the [ Na/Fe ] Galactic Chemical Evolution modelling, and all Galactic chemical evolution models tested fail to reproduce the observed [ Na/Fe ] enrichment in the young thick disc population at super-Solar metallicities. Our study indicates a possible `under-pollution effect' by SNe Ia, which are the dominant producers of iron, in the Galactic disc's Solar-type star population. These findings provide a step forward toward understanding the origin of the unexplained sodium enrichment at super-Solar metallicities in the Galactic disc.

</div>

<div id="div_fig1">

<img src="tmp_2506.17711/./figures/figure5.png" alt="Fig4" width="100%"/>

**Figure 4. -** Panels (a)-(f) are the [Fe/H]-age, age-[Fe/H], [Fe/H]-[Na/Fe], age-[Na/Fe], [Fe/H]-[Na/H] and age-[Na/H] planes, produced by our adjusted GCE code parameters for this study. `Age' here refers to stellar ages. The grey bins are Solar-type thick disc stars selected from the GALAH DR3 catalogue using Equation \ref{eq:Z_range}, with the yellower area having a higher concentration of stars. Solid lines represent GCE models in which all SN Ia explosions are assumed to be from Chandrasekhar mass WDs, while for the dashed lines, sub-Chandrasekhar mass explosions from WD mergers are assumed. We defined the model label in Table \ref{tab:model}. SN Ia contribution is based on the two-exploding white dwarf from \citep{Pakmor2022} and delayed detonation for Chandrasekhar-mass white dwarfs, as described in \citep{Seitenzahl2013}. (*fig:figure5*)

</div>
<div id="div_fig2">

<img src="tmp_2506.17711/./figures/figure2.png" alt="Fig1" width="100%"/>

**Figure 1. -** Normalised Na ($^{23}$Na) production ratios as a function of stellar metallicity (Z) for massive stars with initial masses 15 and 20 M$_{\odot}$. The models shown are massive star yields from \citet[WW95, blue triangles]{Woosley1995}, \citet[Nomoto13, red circles]{Nomoto2013} and \citet[LC18, orange squares]{Limongi2018}. The normalised production ratio (y-axis: Normalised $^{23}$Na Production Ratio) is calculated for each model by dividing the raw $^{23}$Na yield at each metallicity by the minimum $^{23}$Na obtained across all metallicities for that specific model. This normalisation highlights the relative change in Na production as a function of metallicity for each set of stellar models. (*fig:figure2*)

</div>
<div id="div_fig3">

<img src="tmp_2506.17711/./figures/figure1.png" alt="Fig2" width="100%"/>

**Figure 2. -** [Na/Fe] as a function of metallicity [Fe/H] for the sample of Solar-type stars from GALAH DR3 used in this work (see Section \ref{sec:data}). The left panel (a) shows the distribution coloured by stellar age, illustrating the median stellar age in (Gyr) at each ([Fe/H], [Na/Fe]) bin. The right panel displays the same distribution, highlighting the standard deviation in stellar age within the corresponding bins. The Bayesian Stellar Parameters Estimator (BSTEP) was used to compute the stellar ages. The Solar abundance position is marked by the symbol $(\odot)$ at [Fe/H], [Na/Fe]$= (0,0)$. (*fig:figure1*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.17711"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$</div>



<div id="title">

# HAWC Performance Enhanced by Machine Learning in Gamma-Hadron Separation

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.18277-b31b1b.svg)](https://arxiv.org/abs/2506.18277)<mark>Appeared on: 2025-06-24</mark> - 

</div>
<div id="authors">

R. Alfaro, et al. -- incl., <mark>J.A.</mark>

</div>
<div id="abstract">

**Abstract:** Improving gamma-hadron separation is one of the most effective ways to enhance the performance of ground-based gamma-ray observatories. With over a decade of continuous operation, the High-Altitude Water Cherenkov (HAWC) Observatory has contributed significantly to high-energy astrophysics. To further leverage its rich dataset, we introduce a machine learning approach for gamma-hadron separation. A Multilayer Perceptron shows the best performance, surpassing traditional and other Machine Learning based methods. This approach shows a notable improvement in the detector's sensitivity, supported by results from both simulated and real HAWC data. In particular, it achieves a 19 \% increase in significance for the Crab Nebula, commonly used as a benchmark. These improvements highlight the potential of machine learning to significantly enhance the performance of HAWC and provide a valuable reference for ground-based observatories, such as Large High Altitude Air Shower Observatory (LHAASO) and the upcoming Southern Wide-field Gamma-ray Observatory (SWGO).

</div>

<div id="div_fig1">

<img src="tmp_2506.18277/./efficiency_C0_effi.png" alt="Fig1.1" width="50%"/><img src="tmp_2506.18277/./efficiency_C1_effi.png" alt="Fig1.2" width="50%"/>

**Figure 1. -** Gamma-ray efficiencies (dashed lines) and hadron efficiencies (solid lines) as a function of fHit bins for different classification methods. (a): Results for on-array events. (b): Results for off-array events. Classification methods compared include SC, MLP, CNN, and BDT. (*fig:eff*)

</div>
<div id="div_fig2">

<img src="tmp_2506.18277/./efficiency_C0_qfactor.png" alt="Fig2.1" width="50%"/><img src="tmp_2506.18277/./efficiency_C1_qfactor.png" alt="Fig2.2" width="50%"/>

**Figure 2. -** Q-factor as a function of fHit bins for different classification methods. (a): Results for on-array events. (b): Results for off-array events. Classification methods compared include SC, MLP, CNN, and BDT. Q-factors improve with increasing fHit bins, with notable differences across methods, especially in high-fHit bins. (*fig:qfactor*)

</div>
<div id="div_fig3">

<img src="tmp_2506.18277/./roc_curve_c0_log_blind2.png" alt="Fig3.1" width="50%"/><img src="tmp_2506.18277/./roc_curve_c1_log_blind2.png" alt="Fig3.2" width="50%"/>

**Figure 3. -** Receiver Operating Characteristic (ROC) curves and Area Under the Curve (AUC) values for MLP model performance across different fHit bins. Left panel (a): Results for on-array events. Right panel (b): Results for off-array events. Each curve corresponds to a specific fHit bin, labeled from B0 to B10. The MLP model exhibits progressively better classification performance with increasing fHit, as indicated by higher AUC values. (*fig:ROC_curve*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.18277"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# The completeness of the open cluster census towards the Galactic anticentre

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.18708-b31b1b.svg)](https://arxiv.org/abs/2506.18708)<mark>Appeared on: 2025-06-24</mark> -  _18 pages, 13 figures. Accepted in A&A_

</div>
<div id="authors">

E. L. Hunt, et al. -- incl., <mark>M. Fouesneau</mark>

</div>
<div id="abstract">

**Abstract:** Open clusters have long been used as tracers of Galactic structure. However, without a selection function to describe the completeness of the cluster census, it is difficult to quantitatively interpret their distribution. We create a method to empirically determine the selection function of a Galactic cluster catalogue. We test it by investigating the completeness of the cluster census in the outer Milky Way, where old and young clusters exhibit different spatial distributions. We develop a method to generate realistic mock clusters as a function of their parameters, in addition to accounting for $*Gaia*$ 's selection function and astrometric errors. We then inject mock clusters into _Gaia_ DR3 data, and attempt to recover them in a blind search using HDBSCAN. We find that the main parameters influencing cluster detectability are mass, extinction, and distance. Age also plays an important role, making older clusters harder to detect due to their fainter luminosity function. High proper motions also improve detectability. After correcting for these selection effects, we find that old clusters are $2.97\pm0.11$ times more common at a Galactocentric radius of 13 kpc than in the solar neighbourhood -- despite positive detection biases in their favour, such as hotter orbits or a higher scale height. The larger fraction of older clusters in the outer Galaxy cannot be explained by an observational bias, and must be a physical property of the Milky Way: young outer-disc clusters are not forming in the outer Galaxy, or at least not with sufficient masses to be identified as clusters in $*Gaia*$ DR3. We predict that in this region, more old clusters than young ones remain to be discovered. The current presence of old, massive outer-disc clusters could be explained by radial heating and migration, or alternatively by a lower cluster destruction rate in the anticentre.

</div>

<div id="div_fig1">

<img src="tmp_2506.18708/./eh_r_z_with_ocs.png" alt="Fig12" width="100%"/>

**Figure 12. -** Fraction of simulated clusters recovered as a function of $R_\text{GC}$ and $Z$ divided into multiple different mass and ages ranges, and compared against the distribution of OCs in HR24 within those ranges. Each row shows clusters in a different mass range, indicated by the label on each subplot. The subplots in the left column show young clusters with $\log t < 8.5$, while subplots in the right column show old clusters with $\log t > 8.5$. Although proper motions only have a small impact on cluster detectability, we nevertheless only show the detection results of simulated clusters with proper motions $|\mu_{\alpha^*}| < 2.5$ and $|\mu_\delta| < 2.5$, providing a slightly more conservative estimate on cluster detectability at these locations. (*fig:r_z_overall_detections*)

</div>
<div id="div_fig2">

<img src="tmp_2506.18708/./shap_beeswarm.png" alt="Fig7.1" width="50%"/><img src="tmp_2506.18708/./shap_pmdec.png" alt="Fig7.2" width="50%"/>

**Figure 7. -**  SHAP feature importance values for the CST predictor. *Top:* Beeswarm plot where each cluster in the validation dataset is shown as a dot. Each row corresponds to the impact of a different input parameter. The colour coding corresponds to whether it was a high or low value of the parameter. The $x$ axis shows the final impact on the output of the model, which is how much the CST is changed for that given cluster and that given parameter value. For example: for cluster mass, low mass values (blue) correspond to a much lower SHAP/CST, whereas high mass values (red) correspond to a much higher SHAP/CST. On the other hand, most age values have minimal impact on CST, although high ages significantly reduce it. *Bottom:* SHAP value at a given \texttt{pmdec} as a function of \texttt{pmdec} and shown for all clusters in the validation dataset. Colour coding shows the Galactic longitude, $l$. (*fig:cst_shap*)

</div>
<div id="div_fig3">

<img src="tmp_2506.18708/./XY_RgcZ_threepanels_2025.png" alt="Fig8" width="100%"/>

**Figure 8. -**  Spatial distribution of high-certainty clusters (\texttt{CST}$>$4) from \citet{2023A&A...673A.114H}. Ages are taken from \citet{2024AJ....167...12C} as they are more accurate for old clusters (see Sect. \ref{sec:catalogues} for discussion).
    *Top left*: Histogram of cluster galactocentric radii divided into young ($\log t < 8.5$, blue) and old ($\log t > 8.5$, red) clusters and with a 200 pc bin width.
    *Bottom left*: Distribution of the same young and old clusters but in terms of altitude $Z$ and Galactocentric radius $R_{\mathrm{GC}}$, assuming $R_{\mathrm{GC},\odot}$=8.2 kpc.
    *Bottom right*: Projection of these clusters in heliocentric Galactic co-ordinates, with the Sun located at ($X$,$Y$)=(0,0). The dotted lines indicate Galactocentric radii from 10 to 18 kpc. The shaded area is the region investigated in this study ($140^{\circ} \leq \ell \leq 240^{\circ}$ starting 2 kpc from the Sun) . In both lower panels, the cross indicates the cluster Saurer 1, visible in _Gaia_ data but not recovered in the blind search of \citet{2023A&A...673A.114H}. (*fig:XY_RgcZ*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.18708"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

538  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

14  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

6  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
