# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Li  ->  J. Li  |  ['J. Li']
J. Li  ->  J. Li  |  ['J. Li']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']


N. Hoyer  ->  N. Hoyer  |  ['N. Hoyer']
N. Neumayer  ->  N. Neumayer  |  ['N. Neumayer']
S. Jiao  ->  S. Jiao  |  ['S. Jiao']
S. Jiao  ->  S. Jiao  |  ['S. Jiao']
Arxiv has 74 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2505.06340


extracting tarball to tmp_2505.06340...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.06348


extracting tarball to tmp_2505.06348... done.
Retrieving document from  https://arxiv.org/e-print/2505.06349


extracting tarball to tmp_2505.06349...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.07195


extracting tarball to tmp_2505.07195...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.07491


extracting tarball to tmp_2505.07491...

 done.


N. Hoyer  ->  N. Hoyer  |  ['N. Hoyer']
N. Neumayer  ->  N. Neumayer  |  ['N. Neumayer']


Found 279 bibliographic references in tmp_2505.07491/aanda.bbl.
Retrieving document from  https://arxiv.org/e-print/2505.07763


extracting tarball to tmp_2505.07763...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.07764


extracting tarball to tmp_2505.07764...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.07491-b31b1b.svg)](https://arxiv.org/abs/2505.07491) | **Rapid formation of a very massive star >50000 $M_\odot$ and subsequently an IMBH from runaway collisions. Direct N-body and Monte Carlo simulations of dense star clusters**  |
|| M. C. Vergara, et al. -- incl., <mark>N. Hoyer</mark>, <mark>N. Neumayer</mark> |
|*Appeared on*| *2025-05-13*|
|*Comments*| *18 pages, 11 figures*|
|**Abstract**|            Context. We present simulations of a massive young star cluster using \textsc{Nbody6++GPU} and \textsc{MOCCA}. The cluster is initially more compact than previously published models, with one million stars, a total mass of $5.86 \times 10^5~\mathrm{M}_{\odot}$, and a half-mass radius of $0.1~\mathrm{pc}$. Aims. We analyse the formation and growth of a very massive star (VMS) through successive stellar collisions and investigate the subsequent formation of an intermediate-mass black hole (IMBH) in the core of a dense star cluster. Methods. We use both direct \textit{N}-body and Monte Carlo simulations, incorporating updated stellar evolution prescriptions (SSE/BSE) tailored to massive stars and VMSs. These include revised treatments of stellar radii, rejuvenation, and mass loss during collisions. While the prescriptions represent reasonable extrapolations into the VMS regime, the internal structure and thermal state of VMSs formed through stellar collisions remain uncertain, and future work may require further refinement. Results. We find that runaway stellar collisions in the cluster core produce a VMS exceeding $5 \times 10^4~\mathrm{M}_{\odot}$ within 5 Myr, which subsequently collapses into an IMBH. Conclusions. Our model suggests that dense stellar environments may enable the formation of very massive stars and massive black hole seeds through runaway stellar collisions. These results provide a potential pathway for early black hole growth in star clusters and offer theoretical context for interpreting recent JWST observations of young, compact clusters at high redshift.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06349-b31b1b.svg)](https://arxiv.org/abs/2505.06349) | **When relics were made: vigorous stellar rotation and low dark matter content in the massive ultra-compact galaxy GS-9209 at z=4.66**  |
|| R. G. Pascalau, et al. -- incl., <mark>A. d. Graaff</mark> |
|*Appeared on*| *2025-05-13*|
|*Comments*| *45 pages, 44 figures, Submitted to MNRAS, Comments are welcome*|
|**Abstract**|            JWST observations uncovered a large number of massive quiescent galaxies (MQGs) at z>3, which theoretical models struggle to reproduce. Explaining the number density of such objects requires extremely high conversion efficiency of baryons into stars in early dark matter halos. Using stellar kinematics, we can investigate the processes shaping the mass assembly histories of MQGs. We present high-resolution JWST/NIRSpec integral field spectroscopy of GS-9209, a massive, compact quiescent galaxy at z=4.66 ($\log \left (M_{\ast}/M_{\odot} \right) = 10.52 \pm 0.06 $, $R_{eff} = 220 \pm 20$ pc). Full spectral fitting of the spatially resolved stellar continuum reveals a clear rotational pattern, yielding a spin parameter of $\lambda_{R_{eff}} = 0.65 \pm 0.12$. With its high degree of rotational support, this galaxy challenges the scenario of MQGs growing mainly by dry major mergers. This study suggests that at least a fraction of the earliest quiescent galaxies were fast rotators and that quenching was dynamically gentle process, preserving the stellar disc even in highly compact objects. Using Jeans anisotropic modelling (JAM) and a NFW profile, we measure a dark matter fraction of $f_{\rm DM} \left (< R_{eff} \right ) = 6.3^{+2.8}_{-1.7}%$, which is plausible given that this galaxy is extremely compact. Our findings use kinematics to independently confirm the massive nature of early quiescent galaxies, previously inferred from stellar population modelling. We suggest that GS-9209 has a similar structure to low-redshift 'relic' galaxies. However, unlike relic galaxies which have bottom-heavy initial mass functions (IMF), the dynamically inferred stellar mass-to-light ratio of GS-9209 is consistent with a Milky-Way like IMF. The kinematical properties of GS-9209 are different from those of z<1 early-type galaxies and more similar to those of recently quenched post-starburst galaxies at z>2.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06340-b31b1b.svg)](https://arxiv.org/abs/2505.06340) | **The ALMA-CRISTAL survey: Gas, dust, and stars in star-forming galaxies when the Universe was ~1 Gyr old I. Survey overview and case studies**  |
|| R. Herrera-Camus, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-05-13*|
|*Comments*| *Accepted for publication in Astronomy & Astrophysics - 35 pages, 19 figures, 5 tables - Survey website: this http URL*|
|**Abstract**|            We present the ALMA-CRISTAL survey, an ALMA Cycle 8 Large Program designed to investigate the physical properties of star-forming galaxies at $4 \lesssim z \lesssim 6$ through spatially resolved, multi-wavelength observations. This survey targets 19 star-forming main-sequence galaxies selected from the ALPINE survey, using ALMA Band 7 observations to study [CII] 158 $\mu$m line emission and dust continuum, complemented by JWST/NIRCam and HST imaging to map stellar and UV emission. The CRISTAL sample expanded to 39 after including newly detected galaxies in the CRISTAL fields, archival data, and pilot study targets. The resulting dataset provides a detailed view of gas, dust, and stellar structures on kiloparsec scales at the end of the era of reionization. The survey reveals diverse morphologies and kinematics, including rotating disks, merging systems, [CII] emission tails from potential interactions, and clumpy star formation. Notably, the [CII] emission in many cases extends beyond the stellar light seen in HST and JWST imaging. Scientific highlights include CRISTAL-10, exhibiting an extreme [CII] deficit similar to Arp 220; and CRISTAL-13, where feedback from young star-forming clumps likely causes an offset between the stellar clumps and the peaks of [CII] emission. CRISTAL galaxies exhibit global [CII]/FIR ratios that decrease with increasing FIR luminosity, similar to trends seen in local galaxies but shifted to higher luminosities, likely due to their higher molecular gas content. CRISTAL galaxies also span a previously unexplored range of global FIR surface brightness at high-redshift, showing that high-redshift galaxies can have elevated [CII]/FIR ratios. These elevated ratios are likely influenced by factors such as lower metallicity gas, the presence of significant extraplanar gas, and contributions from shock-excited gas.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06348-b31b1b.svg)](https://arxiv.org/abs/2505.06348) | **Surprising Variation of Gamma Rays from the Sun over the Solar Cycle Revealed with Fermi-LAT**  |
|| A. Acharyya, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-05-13*|
|*Comments*| *9 pages. Contact authors: Nicola Giglietto, Elena Orlando, Silvia Raino'*|
|**Abstract**|            The steady-state gamma-ray emission from the Sun is thought to consist of two emission components due to interactions with Galactic cosmic rays: (1) a hadronic component covering the solar disk, and (2) a leptonic component peaking at the solar edge and extending into the heliosphere. The flux of these components is expected to vary with the 11-year solar cycle, being highest during solar minimum and lowest during solar maximum, because it is correlated with the cosmic-ray flux. No study has yet analyzed the flux variation of the two components separately over solar cycles. In this work, we measure the temporal variations of the flux of each component over 15 years of Fermi Large Area Telescope observations and compare them with the sunspot number and Galactic cosmic-ray flux from AMS-02 near the Earth. We find that the flux variation of the disk anticorrelates with solar activity and correlates with cosmic-ray protons, confirming its emission mechanism. The flux variation of the extended component anticorrelates with solar activity only until mid 2012. After that, we no longer observe any correlation or anticorrelation, even with the CR electron flux. This most likely suggests that cosmic-ray transport and modulation in the inner heliosphere are unexpectedly complex and different for electrons and protons or, alternatively, the presence of an additional, unknown component of gamma rays or cosmic rays. These findings impact space weather research and emphasize the need for close monitoring of Cycle 25 and the ongoing polarity reversal.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.07195-b31b1b.svg)](https://arxiv.org/abs/2505.07195) | **Discovery of Main-sequence Radio Pulse emitters from widefield sky surveys**  |
|| B. Das, et al. -- incl., <mark>Y. Wang</mark> |
|*Appeared on*| *2025-05-13*|
|*Comments*| *Accepted for publication in PASA*|
|**Abstract**|            Magnetic AB stars are known to produce periodic radio pulses by the electron cyclotron maser emission (ECME) mechanism. Only 19 such stars, known as 'Main-sequence Radio Pulse emitters' (MRPs) are currently known. The majority of MRPs have been discovered through targeted observation campaigns that involve carefully selecting a sample of stars that are likely to produce ECME, and which can be detected by a given telescope within reasonable amount of time. These selection criteria inadvertently introduce bias in the resulting sample of MRPs, which affects subsequent investigation of the relation between ECME properties and stellar magnetospheric parameters. The alternative is to use all-sky surveys. Until now, MRP candidates obtained from surveys were identified based on their high circular polarisation ($\gtrsim 30\%$). In this paper, we introduce a complementary strategy, which does not require polarisation information. Using multi-epoch data from the Australian SKA Pathfinder (ASKAP) telescope, we identify four MRP candidates based on the variability in the total intensity light curves. Follow-up observations with the Australia Telescope Compact Array (ATCA) confirm three of them to be MRPs, thereby demonstrating the effectiveness of our strategy. With the expanded sample, we find that ECME is affected by temperature and the magnetic field strength, consistent with past results. There is, however, a degeneracy regarding how the two parameters govern the ECME luminosity for magnetic A and late-B stars (effective temperature $\lesssim 16$ kK). The current sample is also inadequate to investigate the role of stellar rotation, which has been shown to play a key role in driving incoherent radio emission.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.07763-b31b1b.svg)](https://arxiv.org/abs/2505.07763) | **Gravitationally Bound Gas Determines Star Formation in the Galaxy**  |
|| <mark>S. Jiao</mark>, et al. |
|*Appeared on*| *2025-05-13*|
|*Comments*| *23 pages, 17 figures. Submitted to A&A*|
|**Abstract**|            Stars form from molecular gas under complex conditions influenced by multiple competing physical mechanisms, such as gravity, turbulence, and magnetic fields. However, accurately identifying the fraction of gas actively involved in star formation remains challenging. Using dust continuum observations from the Herschel Space Observatory, we derived column density maps and their associated probability distribution functions (N-PDFs). Assuming the power-law component in the N-PDFs corresponds to gravitationally bound (and thus star-forming) gas, we analyzed a diverse sample of molecular clouds spanning a wide range of mass and turbulence conditions. This sample included 21 molecular clouds from the solar neighborhood ($d<$500 pc) and 16 high-mass star-forming molecular clouds. For these two groups, we employed the counts of young stellar objects (YSOs) and mid-/far-infrared luminosities as proxies for star formation rates (SFR), respectively. Both groups revealed a tight linear correlation between the mass of gravitationally bound gas and the SFR, suggesting a universally constant star formation efficiency in the gravitationally bound gas phase. The star-forming gas mass derived from threshold column densities ($N_{\mbox {threshold}}$) varies from cloud to cloud and is widely distributed over the range of $\sim$1--17$\times$10$^{21}$ cm$^{-2}$ based on N-PDF analysis. But in solar neighborhood clouds, it is in rough consistency with the traditional approach using $A_{\rm V}$ $\ge$ 8 mag. In contrast, in high turbulent regions (e.g., the Central Molecular Zone) where the classical approach fails, the gravitationally bound gas mass and SFR still follow the same correlation as other high-mass star-forming regions in the Milky Way. Our findings also strongly support the interpretation that gas in the power-law component of the N-PDF is undergoing self-gravitational collapse to form stars.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.07764-b31b1b.svg)](https://arxiv.org/abs/2505.07764) | **Why is the Star Formation Rate Proportional to Dense Gas Mass?**  |
|| <mark>S. Jiao</mark>, et al. |
|*Appeared on*| *2025-05-13*|
|*Comments*| *19 pages, 10 figures. Accepted for publication in ApJ*|
|**Abstract**|            One of the most profound empirical laws of star formation is the Gao-Solomon relation, a linear correlation between the star formation rate (SFR) and the dense molecular gas mass. It is puzzling how the complicated physics in star-formation results in this surprisingly simple proportionality. Using archival Herschel and Atacama Large Millimeter/submillimeter Array Observations, we derived the masses of the most massive cores ($M^{\rm max}_{\rm core}$) and masses of the gravitationally bound gas ($ M_{\rm gas}^{\rm bound}$) in the parent molecular clouds for a sample of low-mass and high-mass star-forming regions. We discovered a significant correlation $\log(M^{\rm max}_{\rm core}/M_{\odot}) = 0.506 \log(M_{\rm gas}^{\rm bound}/M_{\odot})-0.32$. Our discovered $M^{\rm max}_{\rm core}$-$M_{\rm gas}^{\rm bound}$ correlation can be approximately converted to the Gao-Solomon relation if there is (1) a constant 30% efficiency of converting $M^{\rm max}_{\rm core}$ to the mass of the most massive star ($m^{\rm max}_{\rm star}$), and (2) if SFR and $m^{\rm max}_{\rm star}$ are tightly related through $\log({\rm SFR}/(M_{\odot} {\rm yr}^{-1})) = 2.04 \log(m^{\rm max}_{\rm star}/M_{\odot})-5.80$. Intriguingly, both requirements have been suggested by previous theoretical studies (c.f. Yan et al. 2017). Based on this result, we hypothesize that the Gao-Solomon relation is a consequence of combining the following three non-trivial relations (i) SFR vs. $m^{\rm max}_{\rm star}$, (ii) $m^{\rm max}_{\rm star}$ vs. $M^{\rm max}_{\rm core}$, and (iii) $M^{\rm max}_{\rm core}$ vs. $M_{\rm gas}^{\rm bound}$. This finding may open a new possibility to understand the Gao-Solomon relation in an analytic sense.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2505.07491/./plot_vms_stellar_type_nbody_mocca.png', 'tmp_2505.07491/./function_family_age3.jpg', 'tmp_2505.07491/./radius_tau_both.png']
copying  tmp_2505.07491/./plot_vms_stellar_type_nbody_mocca.png to _build/html/
copying  tmp_2505.07491/./function_family_age3.jpg to _build/html/
copying  tmp_2505.07491/./radius_tau_both.png to _build/html/
exported in  _build/html/2505.07491.md
    + _build/html/tmp_2505.07491/./plot_vms_stellar_type_nbody_mocca.png
    + _build/html/tmp_2505.07491/./function_family_age3.jpg
    + _build/html/tmp_2505.07491/./radius_tau_both.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\nsc}{NSC}$
$\newcommand{\albrecht}[1]{\textcolor{black}{#1}}$
$\newcommand{\rainer}[1]{\textcolor{black}{#1}}$
$\newcommand{\mirek}[1]{\textcolor{black}{#1}}$
$\newcommand{\manuel}[1]{\textcolor{black}{#1}}$
$\newcommand{\nadine}[1]{\textcolor{black}{#1}}$
$\newcommand{\xiaoying}[1]{\textcolor{black}{#1}}$
$\newcommand{\ataru}[1]{\textcolor{black}{#1}}$
$\newcommand{\jarrod}[1]{\textcolor{black}{#1}}$
$\newcommand{\abbas}[1]{\textcolor{black}{#1}}$
$\newcommand{\peter}[1]{\textcolor{black}{#1}}$
$\newcommand{\marcelo}[1]{\textcolor{black}{#1}}$
$\newcommand{\dominik}[1]{\textcolor{black}{#1}}$
$\newcommand{\renyue}[1]{\textcolor{black}{#1}}$
$\newcommand{\nils}[1]{\textcolor{black}{#1}}$
$\newcommand{\arek}[1]{\textcolor{black}{#1}}$
$\newcommand{\thorsten}[1]{\textcolor{black}{#1}}$
$\newcommand{\francesco}[1]{\textcolor{black}{#1}}$
$\newcommand{\andres}[1]{\textcolor{black}{#1}}$
$\newcommand{\msol}{\mathrm{M}_{\odot}}$
$\newcommand{\rsol}{\mathrm{R}_{\odot}}$</div>



<div id="title">

# Rapid formation of a very massive star  $>\!\!50000 \mathrm{M}_{\odot}$ and subsequently an IMBH from runaway collisions: Direct $N$-body and Monte Carlo simulations of dense star clusters

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2505.07491-b31b1b.svg)](https://arxiv.org/abs/2505.07491)<mark>Appeared on: 2025-05-13</mark> -  _18 pages, 11 figures_

</div>
<div id="authors">

M. C. Vergara, et al. -- incl., <mark>N. Hoyer</mark>, <mark>N. Neumayer</mark>

</div>
<div id="abstract">

**Abstract:** We present simulations of a massive young star cluster using Nbody6++GPU and MOCCA . The cluster is initially more compact than previously published models, with one million stars, a total mass of $5.86 \times 10^5 \mathrm{M}_{\odot}$ , and a half-mass radius of $0.1 \mathrm{pc}$ . We analyse the formation and growth of a very massive star (VMS) through successive stellar collisions and investigate the subsequent formation of an intermediate-mass black hole (IMBH) in the core of a dense star cluster. We use both direct _N_ -body and Monte Carlo simulations, incorporating updated stellar evolution prescriptions (SSE/BSE) tailored to massive stars and VMSs. These include revised treatments of stellar radii, rejuvenation, and mass loss during collisions. While the prescriptions represent reasonable extrapolations into the VMS regime, the internal structure and thermal state of VMSs formed through stellar collisions remain uncertain, and future work may require further refinement. We find that runaway stellar collisions in the cluster core produce a VMS exceeding $5 \times 10^4 \mathrm{M}_{\odot}$ within 5 Myr, which subsequently collapses into an IMBH. Our model suggests that dense stellar environments may enable the formation of very massive stars and massive black hole seeds through runaway stellar collisions. These results provide a potential pathway for early black hole growth in star clusters and offer theoretical context for interpreting recent JWST observations of young, compact clusters at high redshift.

</div>

<div id="div_fig1">

<img src="tmp_2505.07491/./plot_vms_stellar_type_nbody_mocca.png" alt="Fig9" width="100%"/>

**Figure 9. -** 
Evolution of the VMS (and IMBH thereafter) over time.
Top panel: Mass of the VMS/IMBH, $M_{\mathrm{VMS IMBH}} [\mathrm{M}_{\odot}]$.
Middle panel: Stellar radius of the VMS/IMBH, $R_{\mathrm{VMS IMBH}} [\mathrm{R}_{\odot}]$(logarithmic scale).
Bottom panel: Effective age of the VMS during its main-sequence phase, $A_{\mathrm{VMS IMBH}} [\mathrm{Myr}]$.
 (*fig:VMS_M_Rsun_ageeffective.jpg*)

</div>
<div id="div_fig2">

<img src="tmp_2505.07491/./function_family_age3.jpg" alt="Fig6" width="100%"/>

**Figure 6. -** 
    Figure showing the family of functions for the original treatment $f_{\mathrm{original}}(q,y)$  and the family of functions with our updated treatment $f_{\mathrm{updated}}(q,y)$ for three distinct values of $y \in (-1,-2,-3)$ against the mass ratio $q$ of the MS star collision parnters.
   (*fig:function_family_age3.jpg*)

</div>
<div id="div_fig3">

<img src="tmp_2505.07491/./radius_tau_both.png" alt="Fig10" width="100%"/>

**Figure 10. -** Radii of massive stars (five masses as given in key) as a function of age in units of main sequence lifetime; right panel: results by standard SSE; left panel: results from our upgraded SSE. (*fig:radius*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2505.07491"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

485  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
