# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
J. Li  ->  J. Li  |  ['J. Li']
S. Kraus  ->  S. Kraus  |  ['S. Kraus']
N. Hoyer  ->  N. Hoyer  |  ['N. Hoyer']
N. Neumayer  ->  N. Neumayer  |  ['N. Neumayer']
R. Andrassy  ->  R. Andrassy  |  ['R. Andrassy']
Arxiv has 58 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2504.11528
extracting tarball to tmp_2504.11528... done.
Retrieving document from  https://arxiv.org/e-print/2504.11546


extracting tarball to tmp_2504.11546...

 done.
Retrieving document from  https://arxiv.org/e-print/2504.11945



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2504.11945...

 done.
Retrieving document from  https://arxiv.org/e-print/2504.11954



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2504.11954...

 done.
  0: tmp_2504.11954/sample631.tex, 504 lines
  1: tmp_2504.11954/SB2_orbits-_ApJS_arxiv_submit_20250415/sample631.tex, 504 lines
Retrieving document from  https://arxiv.org/e-print/2504.12046



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2504.12046...

 done.
Retrieving document from  https://arxiv.org/e-print/2504.12079


extracting tarball to tmp_2504.12079...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 367 bibliographic references in tmp_2504.12079/main.bbl.
Issues with the citations
syntax error in line 1747: '=' expected
Retrieving document from  https://arxiv.org/e-print/2504.12213


extracting tarball to tmp_2504.12213...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.12079-b31b1b.svg)](https://arxiv.org/abs/2504.12079) | **Massive Star Clusters in the Semi-Analytical Galaxy Formation Model L-Galaxies 2020**  |
|| <mark>N. Hoyer</mark>, et al. -- incl., <mark>N. Neumayer</mark> |
|*Appeared on*| *2025-04-17*|
|*Comments*| *Resubmitted to A&A. 24 pages, eight figures, two tables. Abstract abriged to meet arXiv requirements*|
|**Abstract**|            It is established that there exists a direct link between the formation history of star cluster populations and their host galaxies, however, our lacking understanding of star cluster assembly prohibits us to make full use of their ability to trace galaxy evolution. In this work we introduce a new variation of the 2020 version of the semi-analytical galaxy formation model "L-Galaxies" that includes the formation of star clusters above 10^4 M_Sun and probes different physical assumptions that affect their evolution over cosmic time. We use properties of different galaxy components and localised star formation to determine the bound fraction of star formation in disks. After randomly sampling masses from an environmentally-dependent star cluster initial mass function, we assign to each object a half-mass radius, metallicity, and distance from the galaxy centre. We consider up to 2000 individual star clusters per galaxy and evolve their properties over time taking into account stellar evolution, two-body relaxation, tidal shocks, dynamical friction, and a re-positioning during galaxy mergers. Our simulation successfully reproduces several observational quantities, such as the empirical relationship between the absolute V -band magnitude of the brightest young star clusters and the host galaxy star formation rate, the mass function of young star clusters, or mean metallicities of the star cluster distributions versus galaxy masses. The simulation reveals great complexity in the z = 0 star cluster population resulting from differential destruction channels and origins, including in-situ populations in the disk, a major merger-induced heated component in the halo, and accreted star clusters. Model variations point out the importance of e.g. the shape of the star cluster initial mass function or the relationship between the sound speed of cold gas and the star formation rate.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.11546-b31b1b.svg)](https://arxiv.org/abs/2504.11546) | **Constraining Lens Masses in Moderately to Highly Magnified Microlensing Events from Gaia**  |
|| U. Pylypenko, et al. |
|*Appeared on*| *2025-04-17*|
|*Comments*| *Submitted to A&A; 18 pages, 12 figures*|
|**Abstract**|            Microlensing events provide a unique way to detect and measure the masses of isolated, non-luminous objects, particularly dark stellar remnants. Under certain conditions, it is possible to measure the mass of these objects using photometry alone, specifically when a microlensing light curve displays a finite-source (FS) effect. This effect generally occurs in highly magnified light curves, i.e. when the source and the lens are very well aligned. In this study, we analyse Gaia Alerts and Gaia Data Release 3 datasets, identifying four moderate-to-high-magnification microlensing events without a discernible FS effect. The absence of this effect suggests a large Einstein radius, implying substantial lens masses. In each event, we constrain the FS effect and therefore establish lower limits for angular Einstein radius and lens mass. Additionally, we use the DarkLensCode software to obtain mass, distance, and brightness distribution for the lens based on the Galactic model. Our analysis established lower mass limits of $\sim 0.2$ $M_{\odot}$ for one lens and $\sim 0.3-0.5$ $M_{\odot}$ for two others. DarkLensCode analysis supports these findings, estimating lens masses in the range of$\sim 0.42-1.66$ $M_{\odot}$ and dark lens probabilities exceeding 60\%. These results strongly indicate that the lenses are stellar remnants, such as white dwarfs or neutron stars. While further investigations are required to confirm the nature of these lenses, we demonstrate a straightforward yet effective approach to identifying stellar remnant candidates.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.12046-b31b1b.svg)](https://arxiv.org/abs/2504.12046) | **The Dynamic Inner Disk of a Planet Forming Star**  |
|| B. R. Setterholm, et al. -- incl., <mark>S. Kraus</mark> |
|*Appeared on*| *2025-04-17*|
|*Comments*| *25 pages, 16 figures*|
|**Abstract**|            Planets are a natural byproduct of the stellar formation process, resulting from local aggregations of material within the disks surrounding young stars. Whereas signatures of gas-giant planets at large orbital separations have been observed and successfully modeled within protoplanetary disks, the formation pathways of planets within their host star's future habitable zones remain poorly understood. Analyzing multiple nights of observations conducted over a short, two-month span with the MIRC-X and PIONIER instruments at the CHARA Array and VLTI, respectively, we uncover a highly active environment at the inner-edge of the planet formation region in the disk of HD 163296. In particular, we localize and track the motion of a disk feature near the dust-sublimation radius with a pattern speed of less than half the local Keplerian velocity, providing a potential glimpse at the planet formation process in action within the inner astronomical unit. We emphasize that this result is at the edge of what is currently possible with available optical interferometric techniques and behooves confirmation with a temporally dense followup observing campaign.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.12213-b31b1b.svg)](https://arxiv.org/abs/2504.12213) | **Magnetically driven outflows in 3D common-envelope evolution of massive stars**  |
|| M. Vetter, et al. -- incl., <mark>R. Andrassy</mark> |
|*Appeared on*| *2025-04-17*|
|*Comments*| *19 Pages, 13 Figures*|
|**Abstract**|            Recent three-dimensional magnetohydrodynamical simulations of the common-envelope interaction revealed the self-consistent formation of bipolar magnetically driven outflows launched from a toroidal structure resembling a circumbinary disk. So far, the dynamical impact of bipolar outflows on the common-envelope phase remains uncertain and we aim to quantify its importance. We illustrate the impact on common-envelope evolution by comparing two simulations -- one with magnetic fields and one without -- using the three-dimensional moving-mesh hydrodynamics code AREPO. We focus on the specific case of a $10 M_\odot$ red supergiant star with a $5 M_\odot$ black hole companion. By the end of the magnetohydrodynamic simulations (after $\sim 1220$ orbits of the core binary system), about $6.4 \%$ of the envelope mass is ejected via the bipolar outflow, contributing to angular momentum extraction from the disk structure and core binary. The resulting enhanced torques reduce the final orbital separation by about $24 \%$ compared to the hydrodynamical scenario, while the overall envelope ejection remains dominated by recombination-driven equatorial winds. We analyze field amplification and outflow launching mechanisms, confirming consistency with earlier studies: magnetic fields are amplified by shear flows, and outflows are launched by a magneto-centrifugal process, supported by local shocks and magnetic pressure gradients. These outflows originate from $\sim 1.1$ times the orbital separation. We conclude that the magnetically driven outflows and their role in the dynamical interaction are a universal aspect, and we further propose an adaptation of the $\alpha_\mathrm{CE}$-formalism by adjusting the final orbital energy with a factor of $1+ M_\mathrm{out}/\mu$, where $M_\mathrm{out}$ is the mass ejected through the outflows and $\mu$ the reduced mass of the core binary. (abridged)         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.11528-b31b1b.svg)](https://arxiv.org/abs/2504.11528) | **How to use Gaia parallaxes for stars with poor astrometric fits**  |
|| <mark>K. El-Badry</mark> |
|*Appeared on*| *2025-04-17*|
|*Comments*| *7 pages, 5 figures. Submitted to OJAp. Example usage at this https URL*|
|**Abstract**|            Gaia parallax measurements for stars with poor astrometric fits -- as evidenced by high renormalized unit weight error (RUWE) -- are often assumed to be unreliable, but the extent and nature of their biases remain poorly quantified. High RUWE is usually a consequence of binarity or higher-order multiplicity, so the parallaxes of sources with high RUWE are often of greatest astrophysical interest. Using realistic simulations of Gaia epoch astrometry, we show that the parallax uncertainties of sources with elevated RUWE are underestimated by a factor that ranges from 1 to 4 and can be robustly predicted from observables. We derive an empirical prescription to inflate reported uncertainties based on a simple analytic function of RUWE, apparent magnitude, and parallax. We validate the correction using (a) single-star solutions for Gaia sources with known orbital solutions and (b) wide binaries containing one component with elevated RUWE. The same uncertainty corrections are expected to perform well in DR4 and DR5. Our results demonstrate that Gaia parallaxes for high-RUWE sources can still yield robust distance estimates if uncertainties are appropriately inflated, enabling distance constraints for triples, binaries with periods too long or too short to be fit astrometrically, and sources blended with neighboring sources.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.11945-b31b1b.svg)](https://arxiv.org/abs/2504.11945) | **Jets from a stellar-mass black hole are as relativistic as those from supermassive black holes**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2025-04-17*|
|*Comments*| *40 pages, 16 figures, submitted version in December 2024*|
|**Abstract**|            Relativistic jets from supermassive black holes in active galactic nuclei are amongst the most powerful phenomena in the universe, acting to regulate the growth of massive galaxies. Similar jets from stellar-mass black holes offer a chance to study the same phenomena on accessible observation time scales. However, such comparative studies across black hole masses and time scales remain hampered by the long-standing perception that stellar-mass black hole jets are in a less relativistic regime. We used radio interferometry observations to monitor the Galactic black hole X-ray binary 4U 1543-47 and discovered two distinct, relativistic ejections launched during a single outburst. Our measurements reveal a likely Lorentz factor of $\sim$ 8 and a minimum of 4.6 at launch with 95% confidence, demonstrating that stellar-mass black holes in X-ray binaries can launch jets as relativistic as those seen in active galactic nuclei.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.11954-b31b1b.svg)](https://arxiv.org/abs/2504.11954) | **Orbital Parameters of 665 Double-lined Spectroscopic Binaries in the LAMOST Medium-Resolution Survey**  |
|| S. Guo, et al. -- incl., <mark>J. Li</mark>, <mark>J. Li</mark> |
|*Appeared on*| *2025-04-17*|
|*Comments*| *Accepted for publication in ApJSS, 18 pages, 13 figures*|
|**Abstract**|            The period, mass ratio, eccentricity, and other orbital parameters are fundamental for investigating binary star evolution. However, the number of binaries with known orbital parameters remains limited. Utilizing the LAMOST-MRS survey, we derived orbital solutions for 665 SB2 binaries by fitting the radial velocities of 1119 SB2 systems with at least six observations, employing a modified version of Thejoker optimized for SB2 binaries. To ensure the reliability of the results, four selection criteria were applied: reduced chi-square, normalized mean absolute error, maximum phase gap, and RV distribution metric. After applying these criteria, 665 reliable orbits were retained. Comparison with Kepler, TESS, and ZTF light curve data shows excellent agreement, with discrepancies in some cases attributed to shorter pulsation periods observed in light curves. Additionally, good consistency is found between our periods and those of SB1 systems in Gaia data. These orbital solutions contribute to understanding binary star evolution and the statistical properties of binary populations.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2504.12079/./rings_bound_fraction_contours_single.png', 'tmp_2504.12079/./rings_frequencies_surface_density_toomre_morphology.png', 'tmp_2504.12079/./cluster_mv_sfr_total.png']
copying  tmp_2504.12079/./rings_bound_fraction_contours_single.png to _build/html/
copying  tmp_2504.12079/./rings_frequencies_surface_density_toomre_morphology.png to _build/html/
copying  tmp_2504.12079/./cluster_mv_sfr_total.png to _build/html/
exported in  _build/html/2504.12079.md
    + _build/html/tmp_2504.12079/./rings_bound_fraction_contours_single.png
    + _build/html/tmp_2504.12079/./rings_frequencies_surface_density_toomre_morphology.png
    + _build/html/tmp_2504.12079/./cluster_mv_sfr_total.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\lgalaxies}{\textit{L-Galaxies}}$
$\newcommand{\emosaics}{\textit{E-MOSAICS}}$
$\newcommand{\cfe}{\texttt{cfe}}$
$\newcommand{\planck}{\textit{Planck}}$
$\newcommand{\mwa}{MWA}$
$\newcommand{\feoh}{[\mathrm{Fe} / \mathrm{H}]}$
$\newcommand{\typezero}{type \texttt{0}}$
$\newcommand{\typeone}{type \texttt{1}}$
$\newcommand{\typetwo}{type \texttt{2}}$
$\newcommand{\rob}[1]{\color{darkgreen}#1 \color{black}}$
$\newcommand{\robc}[1]{\color{darkgreen}\textbf{[#1] }\color{black}}$
$\newcommand{\robs}[1]{\color{darkgreen}\sout{#1} \color{black}}$</div>



<div id="title">

# Massive Star Clusters in the Semi-Analytical Galaxy Formation Model ${\lgalaxies}$${ }$2020

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2504.12079-b31b1b.svg)](https://arxiv.org/abs/2504.12079)<mark>Appeared on: 2025-04-17</mark> -  _Resubmitted to A&A. 24 pages, eight figures, two tables. Abstract abriged to meet arXiv requirements_

</div>
<div id="authors">

<mark>N. Hoyer</mark>, et al. -- incl., <mark>N. Neumayer</mark>

</div>
<div id="abstract">

**Abstract:** It is established that there exists a direct link between the formation history of star cluster populations and their host galaxies, however, our lacking understanding of star cluster assembly prohibits us to make full use of their ability to trace galaxy evolution. In this work we introduce a new variation of the $\num{2020}$ version of the semi-analytical galaxy formation model " ${\lgalaxies}$ " that includes the formation of star clusters above $10^{4}   \textrm{M}_{\odot}$ and probes different physical assumptions that affect their evolution over cosmic time. We use properties of different galaxy components and localised star formation to determine the bound fraction of star formation in disks. After randomly sampling masses from an environmentally-dependent star cluster initial mass function, we assign to each object a half-mass radius, metallicity, and distance from the galaxy centre. We consider up to $2000$ individual star clusters per galaxy and evolve their properties over time taking into account stellar evolution, two-body relaxation, tidal shocks, dynamical friction, and a re-positioning during galaxy mergers. Our simulation successfully reproduces several observational quantities, such as the empirical relationship between the absolute $V$ -band magnitude of the brightest young star clusters and the host galaxy star formation rate, the mass function of young star clusters, or mean metallicities of the star cluster distributions versus galaxy masses. The simulation reveals great complexity in the $z=0$ star cluster population resulting from differential destruction channels and origins, including _in-situ_ populations in the disk, a major merger-induced heated component in the halo, and accreted star clusters. Model variations point out the importance of e.g. the shape of the star cluster initial mass function or the relationship between the sound speed of cold gas and the star formation rate. Our new model provides new avenues to trace individual star clusters and test cluster-related physics within a cosmological set-up in a computationally efficient manner.

</div>

<div id="div_fig1">

<img src="tmp_2504.12079/./rings_bound_fraction_contours_single.png" alt="Fig1" width="100%"/>

**Figure 1. -** 
        Bound fraction of star formation, evaluated for $Q_{\mathrm{eff}} = 0.5$, as a function of epicyclic frequency and cold gas surface density.
        Blue solid and dashed black contours give the smoothed distribution (with standard deviation of $1   \textrm{dex}$) of all annuli of all galaxies with $Q_{\mathrm{eff}} < 2$ after running {$\lgalaxies$} tree-files 0-9 and 40-79 on the Millennium and Millennium-II simulations, respectively.
        Contour lines are smoothed with a Gaussian kernel with standard deviation of $0.5   \textrm{dex}$.
        The location of the solar neighbourhood (see \Cref{fig:annuli_frequencies_surface_density_toomre} for details) is marked with a white cross.
     (*fig:cluster_bound_fraction*)

</div>
<div id="div_fig2">

<img src="tmp_2504.12079/./rings_frequencies_surface_density_toomre_morphology.png" alt="Fig4" width="100%"/>

**Figure 4. -** 
        Epicyclic frequency, cold gas surface mass density, and the Toomre stability parameter as a function of galactocentric distance for disk- (blue) and bulge-dominated (red) galaxies, defined as having a bulge-to-total stellar mass ratio of $B/T < 0.2$ and $B/T \geq 0.9$, respectively.
        We add for comparison the value of the solar neighbourhood: we calculate $\kappa_{\mathrm{D} ,  \odot} \approx 0.046   \textrm{Myr}^{-1}$, as derived from the Oort constants $A = 15.6   \textrm{km}   \textrm{s}^{-1}   \textrm{kpc}^{-1}$ and $B = -15.8   \textrm{km}   \textrm{s}^{-1}   \textrm{kpc}^{-1}$ taken from \citet{guo2023a}; $\Sigma_{\mathrm{g} ,  \odot} \approx 13   \textrm{M}_{\odot}   \textrm{pc}^{-2}$ from \citet{flynn2006b}; and $Q_{\mathrm{eff} ,  \odot} \approx 1.7$\citep[][with $Q_{\mathrm{s} ,  \odot} \approx 2.7$ and $Q_{\mathrm{g} ,  \odot} \approx 1.5$]{binney2008a}, a typical value for disks \citep[e.g.][]{rafikov2001b,leroy2008a,feng2014a,westfall2014a}.
     (*fig:annuli_frequencies_surface_density_toomre*)

</div>
<div id="div_fig3">

<img src="tmp_2504.12079/./cluster_mv_sfr_total.png" alt="Fig5" width="100%"/>

**Figure 5. -** 
        Absolute $V$-band magnitude of the youngest and most massive star cluster versus the galaxy-averaged star formation rate.
        The galaxy sample is limited to disk-dominated galaxies that have a bulge-to-total stellar mass ratio of $B/T < 0.2$.
        We compare our results to various observations of nearby disk-dominated galaxies (see main text for details).
        For both the simulated data and the observations, we set an age cut of $\tau_{\mathrm{c}} \leq 0.3   \textrm{Gyr}$ on the star clusters.
        _Panel A:_ Full observational and simulated data samples.
        For the simulated data, we show the 1-, 2-, and 3-$\sigma$ intervals.
        _Panel B:_ Same as in the first panel but colour-coding all data point by the host galaxy's stellar mass.
        If no stellar mass estimate is available for observational data points, we show them with gray symbols.
        _Panel C:_ Same as the central panel but colour-coding the data points by the cluster formation efficiency, which is a combination of the bound fraction of star formation and the "cruel cradle effect" \citep{kruijssen2012b,kruijssen2012d} that takes the interaction of a proto-star cluster with its natal environment and nearby giant molecular clouds into account.
        Note that the two outliers, NGC{ }1705 and NGC{ }5238, are starburst galaxies and that their massive star clusters were previously classified as nuclear star clusters \citep{pechetti2020a,hoyer2021a}.
        Nuclear star clusters often exhibit complex formation histories \citep[e.g.][]{spengler2017a,kacharov2018a,fahrion2021a} and cannot easily be compared to our simulated star clusters.
     (*fig:cluster_mv_sfr*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2504.12079"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

444  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

12  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
