# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Liu  ->  J. Liu  |  ['J. Liu']
S. Jiao  ->  S. Jiao  |  ['S. Jiao']
J. Liu  ->  J. Liu  |  ['J. Liu']
H. Beuther  ->  H. Beuther  |  ['H. Beuther']
G. Guiglion  ->  G. Guiglion  |  ['G. Guiglion']
Arxiv has 70 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2501.16555


extracting tarball to tmp_2501.16555... done.


Found 39 bibliographic references in tmp_2501.16555/euclid2pcf.bbl.
Retrieving document from  https://arxiv.org/e-print/2501.16585


extracting tarball to tmp_2501.16585... done.
Retrieving document from  https://arxiv.org/e-print/2501.16648


extracting tarball to tmp_2501.16648... done.
Retrieving document from  https://arxiv.org/e-print/2501.16682
Retrieving document from  https://arxiv.org/e-print/2501.16694


not a gzip file


extracting tarball to tmp_2501.16694... done.
Retrieving document from  https://arxiv.org/e-print/2501.16866


extracting tarball to tmp_2501.16866...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 482 bibliographic references in tmp_2501.16866/beuther_kuiper_tafalla2025.bbl.
Issues with the citations
syntax error in line 88: '=' expected
Retrieving document from  https://arxiv.org/e-print/2501.17031


extracting tarball to tmp_2501.17031... done.


Issues with the citations
list index out of range


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.16555-b31b1b.svg)](https://arxiv.org/abs/2501.16555) | **Euclid preparation. 3-dimensional galaxy clustering in configuration space. Part I. 2-point correlation function estimation**  |
|| E. Collaboration, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-01-29*|
|*Comments*| *17 pages, 13 figures, submitted to A&A*|
|**Abstract**|            The 2-point correlation function of the galaxy spatial distribution is a major cosmological observable that enables constraints on the dynamics and geometry of the Universe. The Euclid mission aims at performing an extensive spectroscopic survey of approximately 20--30 million H$\alpha$-emitting galaxies up to about redshift two. This ambitious project seeks to elucidate the nature of dark energy by mapping the 3-dimensional clustering of galaxies over a significant portion of the sky. This paper presents the methodology and software developed for estimating the 3-dimensional 2-point correlation function within the Euclid Science Ground Segment. The software is designed to overcome the significant challenges posed by the large and complex Euclid data set, which involves millions of galaxies. Key challenges include efficient pair counting, managing computational resources, and ensuring the accuracy of the correlation function estimation. The software leverages advanced algorithms, including kd-tree, octree, and linked-list data partitioning strategies, to optimise the pair-counting process. The implementation also includes parallel processing capabilities using shared-memory open multi-processing to further enhance performance and reduce computation times. Extensive validation and performance testing of the software are presented. The results indicate that the software is robust and can reliably estimate the 2-point correlation function, which is essential for deriving cosmological parameters with high precision. Furthermore, the paper discusses the expected performance of the software during different stages of the Euclid Wide Survey observations and forecasts how the precision of the correlation function measurements will improve over the mission's timeline, highlighting the software's capability to handle large data sets efficiently.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.16866-b31b1b.svg)](https://arxiv.org/abs/2501.16866) | **Star formation from low to high mass: A comparative view**  |
|| <mark>H. Beuther</mark>, R. Kuiper, M. Tafalla |
|*Appeared on*| *2025-01-29*|
|*Comments*| *45 pages, 6 figures, accepted for Annual Reviews of Astronomy and Astrophysics*|
|**Abstract**|            Star formation has often been studied by separating the low- and high-mass regimes with an approximate boundary at 8M_sun. While some of the outcomes of the star-formation process are different between the two regimes, it is less clear whether the physical processes leading to these outcomes are that different at all. Here, we systematically compare low- and high-mass star formation by reviewing the most important processes and quantities from an observational and theoretical point of view. We identify three regimes where processes are either similar, quantitatively or qualitatively different between low- and high-mass star formation. Similar characteristics can be identified for the turbulent gas properties and density structures of the star-forming regions. Many of the observational characteristics also do not depend that strongly on the environment. Quantitative differences can be found for outflow, infall and accretion rates as well as mean column and volume densities. Also the multiplicity significantly rises from low- to high-mass stars. The importance of the magnetic field for the formation processes appears still less well constrained. Qualitative differences between low- and high-mass star formation relate mainly to the radiative and ionizing feedback that occurs almost exclusively in regions forming high-mass stars. Nevertheless, accretion apparently can continue via disk structures in ionized accretion flows. Finally, we discuss to what extent a unified picture of star formation over all masses is possible and which issues need to be addressed in the future.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.17031-b31b1b.svg)](https://arxiv.org/abs/2501.17031) | **The interplay between super-metallicity, lithium depletion, and radial migration in nearby stars**  |
|| M. L. L. Dantas, et al. -- incl., <mark>G. Guiglion</mark> |
|*Appeared on*| *2025-01-29*|
|*Comments*| *To appear in the proceedings of IAU Symposium 395 (3 pages)*|
|**Abstract**|            We report the discovery of a peculiar set of old super-metal-rich dwarf stars with orbits of low eccentricity that reach a maximum height from the Galactic plane between $\sim$ 0.5-1.5 kpc observed by the \emph{Gaia}-ESO Survey. These stars show lithium (Li) depletion, which is anti-correlated with their [Fe/H]. To investigate these stars' chemo-dynamical properties, we used data from the \emph{Gaia}-ESO Survey. We applied hierarchical clustering to group the stars based on their abundances (excluding Li). Orbits were integrated using \emph{Gaia} astrometry and radial velocities from \emph{Gaia}-ESO. Our analysis suggests that the high metallicity of these stars is incompatible with their formation in the solar neighbourhood. We also found that their Li envelope abundance is below the benchmark meteoritic value, in agreement with previous works. This result supports the idea that the Li abundance in old, super-metal-rich dwarf stars should not be considered a proxy for the local interstellar medium Li.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.16585-b31b1b.svg)](https://arxiv.org/abs/2501.16585) | **A central TDE candidate detected through spectroscopic continuum emission properties in a SDSS blue quasar**  |
|| <mark>X. Zhang</mark> |
|*Appeared on*| *2025-01-29*|
|*Comments*| *12 pages, 7 figures, Accepted to be published in ApJ*|
|**Abstract**|            In this manuscript, properties of spectroscopic continuum emissions are considered to detect potential tidal disruption event (TDE) candidates among SDSS quasars. After considering the simple blackbody photosphere model applied to describe quasar continuum emissions with parameters of blackbody temperature $T_{BB}$ and blackbody radius $R_{BB}$, SDSS quasars and reported optical TDEs occupy distinct regions in the space of $T_{BB}$ and $R_{BB}$. Then, through the dependence of $R_{BB}$ on $T_{BB}$ for SDSS quasars, 402 outliers in SDSS Stripe82 region can be collected. Among the 402 outliers, the SDSS J2308 at $z=1.16$ is mainly considered, due to its SDSS spectrum observed around the peak brightness of the light curves. With the 7.2-year-long light curves described by theoretical TDE model, the determined $T_{BB}$ and $R_{BB}$ through its spectroscopic continuum emissions are consistent with the TDE model determined values, to support the central TDE. Moreover, considering simulated results on continuum emissions of SDSS quasars around $z\sim1.16$, confidence level higher than 4$\sigma$ can be confirmed that the continuum emissions of SDSS J2308 are not related to normal quasars. Furthermore, accepted CAR process to simulate intrinsic AGN variability, the confidence level higher than $3\sigma$ can be confirmed that the long-term light curves of SDSS J2308 are related to a central TDE. Jointed the probabilities through both spectroscopic and photometric simulations, the confidence level higher than $5\sigma$ can be confirmed to support the central TDE in SDSS J2308.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.16648-b31b1b.svg)](https://arxiv.org/abs/2501.16648) | **Three Brown Dwarfs Masquerading as High-Redshift Galaxies in JWST Observations**  |
|| Z. Tu, S. Wang, X. Chen, <mark>J. Liu</mark> |
|*Appeared on*| *2025-01-29*|
|*Comments*| *17 pages, 8 figures, accepted for publication in ApJ*|
|**Abstract**|            We report the spectroscopic identification of three brown dwarf candidates -- o005_s41280, o006_s00089, and o006_s35616 -- discovered in the RUBIES using James Webb Space Telescope (JWST) Near-Infrared Spectrograph (NIRSpec) PRISM/CLEAR spectroscopy. We fit these sources with multiple substellar atmosphere models and present the atmospheric parameters, including effective temperature ($T_\mathrm{eff}$), surface gravity, and other derived properties. The results suggest that o005_s41280 and o006_s35616, with $T_\mathrm{eff}$ in the ranges of 2100--2300 K and 1800--2000 K, are likely L dwarfs, while o006_s00089, with $T_\mathrm{eff} < 1000$ K, is consistent with a late T dwarf classification. The best-fit model spectra provide a reasonable match to the observed spectra. However, distinct residuals exist in the $Y$, $J$, and $H$ bands for the two L dwarf candidates, particularly for o006_s35616. Incorporating the extinction parameter into the fitting process can significantly reduce these residuals. The distance estimates indicate that these candidates are about 2 kpc away. The analysis of the color-color diagram using multiple JWST NIRcam photometry suggests that cooler T dwarfs, such as o006_s00089, overlap with little red dots (LRDs), while hotter L dwarfs, like o005_s41280 and o006_s35616, tend to contaminate the high-redshift galaxy cluster. These findings suggest a brown dwarf contamination rate of approximately 0.1% in extragalactic deep field surveys, with L dwarfs being more frequently detected than cooler T and Y dwarfs.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.16694-b31b1b.svg)](https://arxiv.org/abs/2501.16694) | **Flaring Activities of Fast Rotating Stars have Solar-like Latitudinal Distribution**  |
|| H. Yang, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2025-01-29*|
|*Comments*| *13 pages,10 figures, accepted by A&A*|
|**Abstract**|            The dynamo theory has always been one of the biggest mysteries in stellar physics. One key reason for its uncertainty is poor knowledge of the dynamo process on stars except the Sun. The most important observation feature of solar dynamo is that active regions only appear at low latitudes, which provides a crucial constraint to the dynamo theory, while Doppler imaging, the current technique to spatially resolve stellar hemisphere, is difficult to distinguish the equatorial region . Hence, the latitudinal distribution of active regions (LDAR) of stars is ambiguous and controversial, mainly due to the limit of the current technique for spatially resolving the stellar surface. Fast rotating stars, which are young and active, are thought to operate with a different dynamo process than the Sun. We study their LDAR and compare them with the Sun to reveal the underlying dynamo process. Flares are drastic and observational activity events, which occur in active regions. Here, we propose a new method to study how the apparent flaring activity varies with respect to the inclination to determine the LDAR of fast rotating this http URL find that the LDAR of fast rotating stars is consistent with that of the Sun, contrary to expectations. Our results provide a crucial constraint to stellar dynamo, indicating that the solar-like dynamo also applies to fast rotating stars, even spanning different stages of their evolution.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.16682-b31b1b.svg)](https://arxiv.org/abs/2501.16682) | **ATOMS: ALMA Three-millimeter Observations of massive Star-forming regions -XX. Probability distribution function of integrated intensity for dense molecular gas tracers**  |
|| C.Zhang, et al. -- incl., <mark>S. Jiao</mark> |
|*Appeared on*| *2025-01-29*|
|*Comments*| **|
|**Abstract**|            We report the observations of J=1-0 of HCN, HCO+, H13CO+, and H13CN, HC3N (J=11-10) emission towards 135 massive star-forming clumps, as part of the ATOMS (ALMA Three-millimeter Observations of Massive Star-forming regions) Survey. We present the integrated intensity probability distribution function for these molecular tracers, modeled as a combination of a log-normal distribution and a power-law tail. The molecular line luminosities for the power-law tail segment, Lmol(p), have been calculated. We have investigated the correlation between the bolometric luminosity, Lbol, and the power-law part of the molecular line luminosity, Lmol(p). Our findings suggest that the scaling relationships between Lbol and Lmol(p) for HCN and HCO+ are sublinear, indicating that these molecules might not be the most effective tracers for the dense gas. In contrast, H13CN and HC3N exhibit a nearly linear relationship between Lbol and Lmol(p), indicating that they can well trace gravitationally bound dense gas. The ratios of Lbol-to-Lmol(p), serving as indicators of star formation efficiency within massive star-forming clumps, exhibit a weak anti-correlation with the power-law index in the I-PDF. In addition, the star formation efficiency is also weakly anti-correlated with the exponent U of the corresponding equivalent density distribution. Our results implie that clumps with substantial gas accumulation may still display low star formation efficiencies.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2501.16555/./runtimes.png', 'tmp_2501.16555/./xi0.png', 'tmp_2501.16555/./xi_ic.png']
copying  tmp_2501.16555/./runtimes.png to _build/html/
copying  tmp_2501.16555/./xi0.png to _build/html/
copying  tmp_2501.16555/./xi_ic.png to _build/html/
exported in  _build/html/2501.16555.md
    + _build/html/tmp_2501.16555/./runtimes.png
    + _build/html/tmp_2501.16555/./xi0.png
    + _build/html/tmp_2501.16555/./xi_ic.png
found figures ['tmp_2501.16866/./images/fig4.png', 'tmp_2501.16866/./images/fig6.png', 'tmp_2501.16866/./images/fig2.png']
copying  tmp_2501.16866/./images/fig4.png to _build/html/
copying  tmp_2501.16866/./images/fig6.png to _build/html/
copying  tmp_2501.16866/./images/fig2.png to _build/html/
exported in  _build/html/2501.16866.md
    + _build/html/tmp_2501.16866/./images/fig4.png
    + _build/html/tmp_2501.16866/./images/fig6.png
    + _build/html/tmp_2501.16866/./images/fig2.png
found figures ['tmp_2501.17031/./figs/MMR_solar_zmax_eccentricity_kde_sun

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\nc}{\newcommand}$
$\newcommand{\orcid}[1]$
$\newcommand$</div>



<div id="title">

# $\Euclid$ preparation. 3-dimensional galaxy clustering in configuration space Part \textrm{I}: 2-point correlation function estimation

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.16555-b31b1b.svg)](https://arxiv.org/abs/2501.16555)<mark>Appeared on: 2025-01-29</mark> -  _17 pages, 13 figures, submitted to A&A_

</div>
<div id="authors">

E. Collaboration, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** The 2-point correlation function of the galaxy spatial distribution is a major cosmological observable that enables constraints on the dynamics and geometry of the Universe. The $\Euclid$ mission aims at performing an extensive spectroscopic survey of approximately 20--30 million H $\alpha$ -emitting galaxies up to about redshift two. This ambitious project seeks to elucidate the nature of dark energy by mapping the 3-dimensional clustering of galaxies over a significant portion of the sky. This paper presents the methodology and software developed for estimating the 3-dimensional 2-point correlation function within the Euclid Science Ground Segment. The software is designed to overcome the significant challenges posed by the large and complex $\Euclid$ data set, which involves millions of galaxies. Key challenges include efficient pair counting, managing computational resources, and ensuring the accuracy of the correlation function estimation. The software leverages advanced algorithms, including kd-tree, octree, and linked-list data partitioning strategies, to optimise the pair-counting process. These methods are crucial for handling the massive volume of data efficiently. The implementation also includes parallel processing capabilities using shared-memory open multi-processing to further enhance performance and reduce computation times. Extensive validation and performance testing of the software are presented. Those have been performed by using various mock galaxy catalogues to ensure that it meets the stringent accuracy requirement of the $\Euclid$ mission. The results indicate that the software is robust and can reliably estimate the 2-point correlation function, which is essential for deriving cosmological parameters with high precision. Furthermore, the paper discusses the expected performance of the software during different stages of the Euclid Wide Survey observations and forecasts how the precision of the correlation function measurements will improve over the mission's timeline, highlighting the software's capability to handle large data sets efficiently.

</div>

<div id="div_fig1">

<img src="tmp_2501.16555/./runtimes.png" alt="Fig2" width="100%"/>

**Figure 2. -** Runtimes for the calculation of the multipole correlation of galaxies obtained from the ELM mock. The times are expressed in CPU-hour and as a function of the data or random catalogue size. The various symbols represent the time spent on the data structure construction, DD calculation, DR calculation, RR calculation, and the overall 2PCF runtime with and without random split option. The DR calculation times are provided as a function of the number of objects in the data catalogue and assuming a fifty times larger random catalogue. The different curves represent the runtimes obtained with the linked-list (solid), kd-tree (dashed), and octree (dotted) algorithms. The blue vertical band shows the range of expected number densities in the spectroscopic sample at redshifts within $0.9<z<1.8$. The abscissa refers to the number of object in the data catalogue except for RR calculation where it refers to the that in the random catalogue. (*fig:runtimes*)

</div>
<div id="div_fig2">

<img src="tmp_2501.16555/./xi0.png" alt="Fig8" width="100%"/>

**Figure 8. -** Monopole correlation function estimated from the FGM mock for galaxies with $\mathrm{H}\alpha$ flux above $2\times10^{-16} \mathrm{erg}  \mathrm{s}^{-1} \mathrm{cm}^{-2}$ at different epochs of observations. The different colours show the monopole in the redshift intervals: $0.9<z<1.1$, $1.1<z<1.3$, $1.3<z<1.5$,$1.5<z<1.8$. (*fig:xi0*)

</div>
<div id="div_fig3">

<img src="tmp_2501.16555/./xi_ic.png" alt="Fig12" width="100%"/>

**Figure 12. -** Impact of the integral constraint on the monopole correlation function in the Euclid Wide Survey. The curves with the different different symbols show the absolute value of the ratio between the monopole correlation functions affected by integral constraint and true underlying one, after 1, 3, and 6 years of observation respectively. The solid curves show the same quantity for year 6 observations, but when the integral constraint effect is derived from a model correlation function. The different colours show this quantity for the redshift intervals: $0.9<z<1.1$, $1.1<z<1.3$,$1.3<z<1.5$,$1.5<z<1.8$. The dotted curves delineate the expected $1\sigma$ statistical uncertainty on the monopole correlation function in the completed Euclid Wide Survey. (*fig:ic*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.16555"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\MTnote}[1]{ {\color{red} [MT: ~#1]}}$
$\newcommand{\aj}{AJ}$
$\newcommand{\araa}{ARA\&A}$
$\newcommand{\apj}{ApJ}$
$\newcommand{\apjl}{ApJ}$
$\newcommand{\apjs}{ApJS}$
$\newcommand{\ao}{Appl.~Opt.}$
$\newcommand{\apss}{Ap\&SS}$
$\newcommand{\aap}{A\&A}$
$\newcommand{\aapr}{A\&A~Rev.}$
$\newcommand{\aaps}{A\&AS}$
$\newcommand{\azh}{AZh}$
$\newcommand{\baas}{BAAS}$
$\newcommand{\jrasc}{JRASC}$
$\newcommand{\memras}{MmRAS}$
$\newcommand{\mnras}{MNRAS}$
$\newcommand{\pra}{Phys.~Rev.~A}$
$\newcommand{\prb}{Phys.~Rev.~B}$
$\newcommand{\prc}{Phys.~Rev.~C}$
$\newcommand{\prd}{Phys.~Rev.~D}$
$\newcommand{\pre}{Phys.~Rev.~E}$
$\newcommand{\prl}{Phys.~Rev.~Lett.}$
$\newcommand{\pasp}{PASP}$
$\newcommand{\pasj}{PASJ}$
$\newcommand{\qjras}{QJRAS}$
$\newcommand{\skytel}{S\&T}$
$\newcommand{\solphys}{Sol.~Phys.}$
$\newcommand{\sovast}{Soviet~Ast.}$
$\newcommand{\ssr}{Space~Sci.~Rev.}$
$\newcommand{\zap}{ZAp}$
$\newcommand{\nat}{Nature}$
$\newcommand{\iaucirc}{IAU~Circ.}$
$\newcommand{\aplett}{Astrophys.~Lett.}$
$\newcommand{\apspr}{Astrophys.~Space~Phys.~Res.}$
$\newcommand{\bain}{Bull.~Astron.~Inst.~Netherlands}$
$\newcommand{\fcp}{Fund.~Cosmic~Phys.}$
$\newcommand{\gca}{Geochim.~Cosmochim.~Acta}$
$\newcommand{\grl}{Geophys.~Res.~Lett.}$
$\newcommand{\jcp}{J.~Chem.~Phys.}$
$\newcommand{\jgr}{J.~Geophys.~Res.}$
$\newcommand{\jqsrt}{J.~Quant.~Spec.~Radiat.~Transf.}$
$\newcommand{\memsai}{Mem.~Soc.~Astron.~Italiana}$
$\newcommand{\nphysa}{Nucl.~Phys.~A}$
$\newcommand{\physrep}{Phys.~Rep.}$
$\newcommand{\physscr}{Phys.~Scr}$
$\newcommand{\planss}{Planet.~Space~Sci.}$
$\newcommand{\procspie}{Proc.~SPIE}$</div>



<div id="title">

# Star formation from low to high mass:\ A comparative view

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.16866-b31b1b.svg)](https://arxiv.org/abs/2501.16866)<mark>Appeared on: 2025-01-29</mark> -  _45 pages, 6 figures, accepted for Annual Reviews of Astronomy and Astrophysics_

</div>
<div id="authors">

<mark>H. Beuther</mark>, R. Kuiper, M. Tafalla

</div>
<div id="abstract">

**Abstract:** * Qualitative differences between low- and high-mass star formation relate mainly to the radiative and ionizing feedback that occurs almost exclusively in regions forming high-mass stars. Nevertheless, accretion apparently can continue via disk structures in ionized accretion flows.\end{itemize}Finally, we discuss to what extent a unified picture of star formation over all masses is possible and which issues need to be addressed in the future.\end{minipage}$

</div>

<div id="div_fig1">

<img src="tmp_2501.16866/./images/fig4.png" alt="Fig3" width="100%"/>

**Figure 3. -** Comparison of core separations and average densities for example low-, intermediate and high-mass star-forming regions on the same physical scales. The left, middle and right panels show data in color and contours for B213 ($N_2$H$^+$(1--0), \citealt{tafalla2015}), IRDC 19175 ($N_2$H$^+$(1--0), \citealt{beuther2009}) and IRDC 18310-4 (1.1 mm continuum, \citealt{beuther2015}). Linear scale-bars and spatial resolution elements are shown to the left and top-right in each panel, respectively. (*jeans-length*)

</div>
<div id="div_fig2">

<img src="tmp_2501.16866/./images/fig6.png" alt="Fig4" width="100%"/>

**Figure 4. -** Sketch of a star formation complex encompassing low- and high-mass star formation. While the top-part presents a large, magnetized filamentary cloud with low- and high-mass star formation occurring in different density regimes, the insets in the bottom part outline sub-aspects. While cores, disks and jets can be found in low- and high-mass regions, the feedback processes exclusively stem from high-mass stars. The figure is inspired by observational and numerical data from \citet{grudic2022,soler2019,traficante2023,oliva2023b}, and a Hubble image of Orion (credit: NASA, C.R. O'Dell and S.K. Wong). The entire figure is created by André Oliva. (*sketch_summary*)

</div>
<div id="div_fig3">

<img src="tmp_2501.16866/./images/fig2.png" alt="Fig2" width="100%"/>

**Figure 2. -** Normalized histograms of the $\delta V$ parameter derived from
infall searches towards low-mass
starless cores (left panel, \citealt{lee2001}), low-mass
protostars (middle panel, \citealt{mardones1997}), and high-mass clumps
(right panel, \citealt{jackson2019}). All $\delta V$ estimates use
$N_2$H$^+$ as the thin tracer and the thick tracer indicated in the
abscissa label. Despite the large differences between their targets, the three
histograms present similar $\delta V$ distributions in terms of shape, width,
and slight excess of negative values (indicative of contraction motions). (*fig_deltav*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.16866"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\feh}{[Fe/H]}$
$\newcommand{\zmax}{\langle Z_{\rm{max}} \rangle}$
$\newcommand{\eccentricity}{\langle e \rangle}$
$\newcommand{\aj}{AJ}$
$\newcommand{\araa}{ARA\&A}$
$\newcommand{\apj}{ApJ}$
$\newcommand{\apjl}{ApJ}$
$\newcommand{\apjs}{ApJS}$
$\newcommand{\apss}{Ap\&SS}$
$\newcommand{\aap}{A\&A}$
$\newcommand{\aapr}{A\&A~Rev.}$
$\newcommand{\aaps}{A\&AS}$
$\newcommand{\mnras}{MNRAS}$
$\newcommand{\pasp}{PASP}$
$\newcommand{\pasj}{PASJ}$
$\newcommand{\qjras}{QJRAS}$
$\newcommand{\nat}{Nature}$
$\newcommand{\aplett}{Astrophys.~Lett.}$
$\newcommand{\aas}{AAS}$
$\newcommand{\eprint}{e--print}$</div>



<div id="title">

# The interplay between super-metallicity, lithium depletion, and radial migration in nearby stars

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.17031-b31b1b.svg)](https://arxiv.org/abs/2501.17031)<mark>Appeared on: 2025-01-29</mark> -  _To appear in the proceedings of IAU Symposium 395 (3 pages)_

</div>
<div id="authors">

M. L. L. Dantas, et al. -- incl., <mark>G. Guiglion</mark>

</div>
<div id="abstract">

**Abstract:** We report the discovery of a peculiar set of old super-metal-rich dwarf stars with orbits of low eccentricity that reach a maximum height from the Galactic plane between $\sim$ 0.5-1.5 kpc observed by the $*Gaia*$ -ESO Survey. These stars show lithium (Li) depletion, which is anti-correlated with their [ Fe/H ] . To investigate these stars' chemo-dynamical properties, we used data from the $*Gaia*$ -ESO Survey. We applied hierarchical clustering to group the stars based on their abundances (excluding Li). Orbits were integrated using $*Gaia*$ astrometry and radial velocities from $*Gaia*$ -ESO. Our analysis suggests that the high metallicity of these stars is incompatible with their formation in the solar neighbourhood. We also found that their Li envelope abundance is below the benchmark meteoritic value, in agreement with previous works. This result supports the idea that the Li abundance in old, super-metal-rich dwarf stars should not be considered a proxy for the local interstellar medium Li.

</div>

<div id="div_fig1">

<img src="tmp_2501.17031/./figs/MMR_solar_zmax_eccentricity_kde_sunset_PROCEEDINGS.png" alt="Fig2.1" width="50%"/><img src="tmp_2501.17031/./figs/MMR_solar_feh_rguiding_kde_zoom_sunset_PROCEEDINGS.png" alt="Fig2.2" width="50%"/>

**Figure 2. -** *Top panel*: $\zmax$*vs.*$\eccentricity$ for the five subgroups of the most metal-rich group. The Solar-metallicity group parameters are shown in grey for comparison. The thin and thick disc scale-heights are shown as black dashed and dot-dashed lines respectively. *Bottom panel*: similar to the top panel, but showing [Fe/H]*vs.*$\langle R_{\rm gui} \rangle$(guiding radius). The dotted, dashed, and dot-dashed black curves respectively depict the 3.3, 8, and 11 Gyr models described in \citet{Magrini2009}. The analysis encompassing the full sample with all metallicities is discussed in Dantas et al. (in review). (*fig:zmax_ecc*)

</div>
<div id="div_fig2">

<img src="tmp_2501.17031/./figs/lithium_top6li_all11subgs_lifeh_g19_pastel_PROCEEDINGS.png" alt="Fig1.1" width="50%"/><img src="tmp_2501.17031/./figs/lithium_top6li_all11subgs_liteff_pastel_PROCEEDINGS.png" alt="Fig1.2" width="50%"/>

**Figure 1. -** *Left panel:*$\langle{\rm A(Li)} \rangle$ vs $\feh$ for the super-solar groups of the sample, split into those with a direct detection of Li (LiD) and those with an upper limit estimate (LiUL). In this representation, only the median of the top six stars with the highest A(Li) is shown in each marker. The black star-shaped markers display the data from \citet{Guiglion2016}. *Right panel:*$\langle{\rm A(Li)} \rangle$ vs $\langle T_{\rm eff} \rangle$. It is possible to see that A(Li) seems to decrease with decreasing $T_{\rm eff}$. Warmer temperatures seem to have a protective effect on A(Li) due to their thinner convective layers of the stars. (*fig:li*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.17031"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

322  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
