# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
M. Benisty  ->  M. Benisty  |  ['M. Benisty']
R. Zhang  ->  R. Zhang  |  ['R. Zhang']
S. Li  ->  S. Li  |  ['S. Li']


G. Guiglion  ->  G. Guiglion  |  ['G. Guiglion']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
Arxiv has 66 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2407.06257


extracting tarball to tmp_2407.06257... done.
Retrieving document from  https://arxiv.org/e-print/2407.06272


extracting tarball to tmp_2407.06272...

 done.
Retrieving document from  https://arxiv.org/e-print/2407.06444
extracting tarball to tmp_2407.06444...

 done.
Retrieving document from  https://arxiv.org/e-print/2407.06845


extracting tarball to tmp_2407.06845...

 done.
  3: tmp_2407.06845/rnaas.tex, 136 lines
  5: tmp_2407.06845/main.tex, 949 lines



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


S. Li  ->  S. Li  |  ['S. Li']


Found 100 bibliographic references in tmp_2407.06845/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2407.06874


extracting tarball to tmp_2407.06874...

 done.
  2: tmp_2407.06874/main.tex, 369 lines
  5: tmp_2407.06874/sample631.tex, 753 lines
Retrieving document from  https://arxiv.org/e-print/2407.06963



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2407.06963...

 done.


Found 173 bibliographic references in tmp_2407.06963/shboost_v1.bbl.
syntax error in line 844: '=' expected
Retrieving document from  https://arxiv.org/e-print/2407.07048
extracting tarball to tmp_2407.07048...

 done.


H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']


Found 46 bibliographic references in tmp_2407.07048/main.bbl.
Error retrieving bib data for Eggleton_1983_AproximationsRadii: 'author'


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.06845-b31b1b.svg)](https://arxiv.org/abs/2407.06845) | **Digging into the Interior of Hot Cores with ALMA (DIHCA). IV. Fragmentation in High-mass Star-Forming Clumps**  |
|| K. Ishihara, et al. -- incl., <mark>S. Li</mark> |
|*Appeared on*| *2024-07-10*|
|*Comments*| *30 pages, 18 figures, Accepted in ApJ*|
|**Abstract**|            Fragmentation contributes to the formation and evolution of stars. Observationally, high-mass stars are known to form multiple-star systems, preferentially in cluster environments. Theoretically, Jeans instability has been suggested to determine characteristic fragmentation scales, and thermal or turbulent motion in the parental gas clump mainly contributes to the instability. To search for such a characteristic fragmentation scale, we have analyzed ALMA 1.33 mm continuum observations toward 30 high-mass star-forming clumps taken by the Digging into the Interior of Hot Cores with ALMA (DIHCA) survey. We have identified 573 cores using the dendrogram algorithm and measured the separation of cores by using the Minimum Spanning Tree (MST) technique. The core separation corrected by projection effects has a distribution peaked around 5800 au. In order to remove biases produced by different distances and sensitivities, we further smooth the images to a common physical scale and perform completeness tests. Our careful analysis finds a characteristic fragmentation scale of $\sim$7000 au, comparable to the thermal Jeans length of the clumps. We conclude that thermal Jeans fragmentation plays a dominant role in determining the clump fragmentation in high-mass star-forming regions, without the need of invoking turbulent Jeans fragmentation.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.06963-b31b1b.svg)](https://arxiv.org/abs/2407.06963) | **Transferring spectroscopic stellar labels to 217 million Gaia DR3 XP stars with SHBoost**  |
|| A. Khalatyan, et al. -- incl., <mark>G. Guiglion</mark> |
|*Appeared on*| *2024-07-10*|
|*Comments*| *Submitted to A&A. 13 pages, 13 figures + references & appendices. Data available at this https URL*|
|**Abstract**|            We explore the feasibility of using machine-learning regression as a method of extracting basic stellar parameters and line-of-sight extinctions, given spectro-photometric data. To this end, we build a stable gradient-boosted random-forest regressor (xgboost), trained on spectroscopic data, capable of producing output parameters with reliable uncertainties from Gaia DR3 data (most notably the low-resolution XP spectra) without ground-based spectroscopic observations. Using Shapley additive explanations, we are able to interpret how the predictions for each star are influenced by each data feature. For the training and testing of the network, we use high-quality parameters obtained from the StarHorse code for a sample of around eight million stars observed by major spectroscopic surveys (APOGEE, GALAH, LAMOST, RAVE, SEGUE, and GES), complemented by curated samples of hot stars, very metal-poor stars, white dwarfs, and hot sub-dwarfs. The training data cover the whole sky, all Galactic components, and almost the full magnitude range of the Gaia DR3 XP sample of more than 217 million objects that also have parallaxes. We achieve median uncertainties (at $G\approx16$) of 0.20 mag in V-band extinction, 0.01 dex in logarithmic effective temperature, 0.20 dex in surface gravity, 0.18 dex in metallicity, and $12\%$ in mass (over the full Gaia DR3 XP sample, with considerable variations in precision as a function of magnitude and stellar type). We succeed in predicting competitive results based on Gaia DR3 XP spectra compared to classical isochrone fitting methods we employed in earlier work, especially for the parameters $A_V$, $T_{\rm eff}$, and metallicity. Finally, we showcase some applications of this new catalogue (e.g. extinction maps, metallicity trends in the Milky Way, extended maps of young massive stars, metal-poor stars, and metal-rich stars). [abridged]         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.07048-b31b1b.svg)](https://arxiv.org/abs/2407.07048) | **Ba Enrichment in Gaia MS+WD Binaries: Tracing $s$-Process Element Production**  |
|| P. Rekhi, et al. -- incl., <mark>H.-W. Rix</mark> |
|*Appeared on*| *2024-07-10*|
|*Comments*| *12 pages, 10 figures*|
|**Abstract**|            A large population of intermediate-separation binaries, consisting of a main-sequence (MS) star and a white dwarf (WD), has recently emerged from Gaia's third data release (DR3), posing challenges to current models of binary evolution. Here we examine the $s$-process element abundances in these systems using data from GALAH DR3. Following refined sample analysis with parameter estimates based on GALAH spectra, we find a distinct locus where enhanced $s$-process elements depend on both the WD mass and metallicity, consistent with loci identified in previous asymptotic giant branch (AGB) nucleosynthesis studies with higher $s$-process yields. Notably, these enhanced abundances show no correlation with the systems' orbital parameters, supporting a history of accretion in intermediate-separation MS+WD systems. Consequently, our results form a direct observational evidence of a connection between AGB masses and $s$-process yields. We conclude by showing that the GALAH DR3 survey includes numerous Ba dwarf stars, within and beyond the mass range covered in our current sample, which can further elucidate $s$-process element distributions in MS+WD binaries.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.06257-b31b1b.svg)](https://arxiv.org/abs/2407.06257) | **Once a Triple, Not Always a Triple: The Evolution of Hierarchical Triples that Yield Merged Inner Binaries**  |
|| C. Shariat, et al. -- incl., <mark>K. El-Badry</mark> |
|*Appeared on*| *2024-07-10*|
|*Comments*| *Submitted to ApJ. Comments are welcome*|
|**Abstract**|            More than half of all main-sequence (MS) stars have one or more companions, and many of those with initial masses <8 M$_\odot$ are born in hierarchical triples. These systems feature two stars in a close orbit (the inner binary) while a tertiary star orbits them on a wider orbit (the outer binary). In hierarchical triples, three-body dynamics combined with stellar evolution drives interactions and, in many cases, merges the inner binary entirely to create a renovated `Post-Merger Binary' (PMB). By leveraging dynamical simulations and tracking binary interactions, we explore the outcomes of merged triples and investigate whether PMBs preserve signatures of their three-body history. Our findings indicate that in 26-54% of wide double WD binaries (s>100 au), the more massive white dwarf (WD) is a merger product, implying that these DWD binaries were previously triples. Overall, we estimate that $44\pm14\%$ of observed wide DWDs originated in triple star systems and thereby have rich dynamical histories. Additionally, our results suggest that the separations of inner and outer binaries are uncorrelated at birth, providing insights into stellar formation processes. We also examine MS+MS and MS+Red Giant mergers manifesting as Blue Straggler stars (BSSs). These PMBs have orbital configurations and ages similar to most observed BSS binaries. While the triple+merger formation channel can explain the observed chemical abundances, moderate eccentricities, and companion masses in BSS binaries, it likely only accounts for $\sim$20-25% of BSSs. Meanwhile, we predict that the majority of observed single BSSs formed as collisions in triples and harbor long-period (>10 yr) companions. Furthermore, both BSS binaries and DWDs exhibit signatures of WD birth kicks.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.06272-b31b1b.svg)](https://arxiv.org/abs/2407.06272) | **ALMA high-resolution observations unveil planet formation shaping molecular emission in the PDS 70 disk**  |
|| L. Rampinelli, et al. -- incl., <mark>M. Benisty</mark> |
|*Appeared on*| *2024-07-10*|
|*Comments*| *25 pages, 10 figures, 3 tables, 8 figures and one table in appendix. Accepted for publication in A&A*|
|**Abstract**|            With two directly detected protoplanets, the PDS 70 system is a unique source in which to study the complex interplay between forming planets and their natal environment. The large dust cavity carved by the two giant planets can affect the disk chemistry, and therefore the molecular emission morphology. On the other hand, chemical properties of the gas component of the disk are expected to leave an imprint on the planetary atmospheres. In this work, we reconstruct the emission morphology of a rich inventory of molecular tracers in the PDS 70 disk, and we look for possible chemical signatures of the two actively accreting protoplanets, PDS b and c. We leverage Atacama Large Millimeter/submillimeter Array (ALMA) band 6 high-angular-resolution and deep-sensitivity line emission observations, together with image and $uv$-plane techniques, to boost the detection of faint lines. We robustly detect ring-shaped emission from $^{12}$CO, $^{13}$CO, C$^{18}$O, H$^{13}$CN, HC$^{15}$N, DCN, H$_2$CO, CS, C$_2$H, and H$^{13}$CO$^{+}$ lines in unprecedented detail. Most of the molecular tracers show a peak of the emission inside the millimeter dust peak. We interpret this as the direct impact of the effective irradiation of the cavity wall, as a result of the planet formation process. Moreover, we have found evidence of an O-poor gas reservoir in the outer disk, which is supported by the observations of bright C-rich molecules, the non-detection of SO, and a lower limit on the $\mathrm{CS/SO}$ ratio of $\sim1$. Eventually, we provide the first detection of the c-C$_3$H$_2$ transitions at 218.73 GHz, and the marginal detection of an azimuthal asymmetry in the higher-energy H$_2$CO (3$_{2,1}$-2$_{2,0}$) line, which could be due to accretion heating near PDS 70b.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.06444-b31b1b.svg)](https://arxiv.org/abs/2407.06444) | **First laboratory and on-sky results of an adaptive secondary mirror with TNO-style actuators on the NASA Infrared Telescope Facility**  |
|| E. Lee, et al. -- incl., <mark>R. Zhang</mark> |
|*Appeared on*| *2024-07-10*|
|*Comments*| *15 pages, 12 figures, SPIE Astronomical Telescopes + Instrumentation: Adaptive Optics Systems IX*|
|**Abstract**|            We are developing an adaptive secondary mirror (ASM) that uses a new actuator technology created by the Netherlands Organization for Applied Scientific Research (TNO). The TNO hybrid variable reluctance actuators have more than an order of magnitude better efficiency over the traditional voice coil actuators that have been used on existing ASMs and show potential for improving the long-term robustness and reliability of ASMs. To demonstrate the performance, operations, and serviceability of TNO's actuators in an observatory, we have developed a 36-actuator prototype ASM for the NASA Infrared Telescope Facility (IRTF) called IRTF-ASM-1. IRTF-ASM-1 provides the first on-sky demonstration of this approach and will help us evaluate the long-term performance and use of this technology in an astronomical facility environment. We present calibration and performance results with the ASM in a Meniscus Hindle Sphere lens setup as well as preliminary on-sky results on IRTF. IRTF-ASM-1 achieved stable closed-loop performance on-sky with H-band Strehl ratios of 35-40% in long-exposure images under a variety of seeing conditions.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.06874-b31b1b.svg)](https://arxiv.org/abs/2407.06874) | **The flux of ultra-high-energy cosmic rays along the supergalactic plane measured at the Pierre Auger Observatory**  |
|| P. A. Collaboration, et al. |
|*Appeared on*| *2024-07-10*|
|*Comments*| *submitted to ApJ*|
|**Abstract**|            Ultra-high-energy cosmic rays are known to be mainly of extragalactic origin, and their propagation is limited by energy losses, so their arrival directions are expected to correlate with the large-scale structure of the local Universe. In this work, we investigate the possible presence of intermediate-scale excesses in the flux of the most energetic cosmic rays from the direction of the supergalactic plane region using events with energies above 20 EeV recorded with the surface detector array of the Pierre Auger Observatory up to 31 December 2022, with a total exposure of 135,000 km^2 sr yr. The strongest indication for an excess that we find, with a post-trial significance of 3.1{\sigma}, is in the Centaurus region, as in our previous reports, and it extends down to lower energies than previously studied. We do not find any strong hints of excesses from any other region of the supergalactic plane at the same angular scale. In particular, our results do not confirm the reports by the Telescope Array collaboration of excesses from two regions in the Northern Hemisphere at the edge of the field of view of the Pierre Auger Observatory. With a comparable exposure, our results in those regions are in good agreement with the expectations from an isotropic distribution.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2407.06845/./f_each_corr_fix_area.png', 'tmp_2407.06845/./f_Sep_original_3type_wVLs_v2.png', 'tmp_2407.06845/./f_Sep_3d_8type_ws.png']
copying  tmp_2407.06845/./f_each_corr_fix_area.png to _build/html/
copying  tmp_2407.06845/./f_Sep_original_3type_wVLs_v2.png to _build/html/
copying  tmp_2407.06845/./f_Sep_3d_8type_ws.png to _build/html/
exported in  _build/html/2407.06845.md
    + _build/html/tmp_2407.06845/./f_each_corr_fix_area.png
    + _build/html/tmp_2407.06845/./f_Sep_original_3type_wVLs_v2.png
    + _build/html/tmp_2407.06845/./f_Sep_3d_8type_ws.png
found figures ['tmp_2407.06963/./im/shngboost_q23_xp_test_kiel_magbins.png', 'tmp_2407.06963/./im/ngboost_performance_testdataset_all.png', 'tmp_2407.06963/./im/metallicity_calibration.png']
copying  tmp_2407.06963/./im/shngboost_q23_xp_test_kiel_magbins.png to _build/html/
copying  tmp_2407.06963/./im/ngboost_performance_testdataset_all.png to _build/html/
copying  tmp_2407.06963/./im/metallicity_calibration.pn

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\JL}{\lambda_{\mathrm{J, clump}}^{\mathrm{th}}}$
$\newcommand{\tJL}{\lambda_{\mathrm{J, clump}}^{\mathrm{tur}}}$
$\newcommand{\JM}{\mathrm{M}_{\mathrm{J, clump}}^{\mathrm{th}}}$
$\newcommand{\tJM}{\mathrm{M}_{\mathrm{J, clump}}^{\mathrm{tur}}}$
$\newcommand{\Add}[1]{\textbf{#1}}$</div>



<div id="title">

# Digging into the Interior of Hot Cores with ALMA (DIHCA). IV. Fragmentation in High-mass Star-Forming Clumps

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2407.06845-b31b1b.svg)](https://arxiv.org/abs/2407.06845)<mark>Appeared on: 2024-07-10</mark> -  _30 pages, 18 figures, Accepted in ApJ_

</div>
<div id="authors">

K. Ishihara, et al. -- incl., <mark>S. Li</mark>

</div>
<div id="abstract">

**Abstract:** Fragmentation contributes to the formation and evolution of stars.Observationally, high-mass stars are known to form multiple-star systems, preferentially in cluster environments.Theoretically, Jeans instability has been suggested to determine characteristic fragmentation scales, and thermal or turbulent motion in the parental gas clump mainly contributes to the instability.To search for such a characteristic fragmentation scale, we have analyzed ALMA 1.33 mm continuum observations toward 30 high-mass star-forming clumps taken by the Digging into the Interior of Hot Cores with ALMA (DIHCA) survey.We have identified 573 cores using the dendrogram algorithm and measured the separation of cores by using the Minimum Spanning Tree (MST) technique.The core separation corrected by projection effects has a distribution peaked  around 5800 au.In order to remove biases produced by different distances and sensitivities, we further smooth the images to a common physical scale and perform completeness tests.Our careful analysis finds a characteristic fragmentation scale of $\sim$ 7000 au, comparable to the thermal Jeans length of the clumps. We conclude that thermal Jeans fragmentation plays a dominant role in determining the clump fragmentation in high-mass star-forming regions, without the need of invoking turbulent Jeans fragmentation.

</div>

<div id="div_fig1">

<img src="tmp_2407.06845/./f_each_corr_fix_area.png" alt="Fig11" width="100%"/>

**Figure 11. -** Top panels: target distance, rms noise level and $5\sigma$ mass sensitivity against the number of identified cores.
Bottom panels: target distances, rms noise level and $5\sigma$ mass sensitivity against the number of identified cores within the same physical area whose diameter is the minimum of the FWHM ($\sim$16 pc) of the FoV in the regions.
The header of each panel shows the Spearman's rank coefficient $\rho_s$ and the p-value.
The color coding shows the target distances.
 (*fig:correlations*)

</div>
<div id="div_fig2">

<img src="tmp_2407.06845/./f_Sep_original_3type_wVLs_v2.png" alt="Fig3" width="100%"/>

**Figure 3. -** _ top_: Distribution of the angular separation obtained from MST for the entire sample. The solid line shows the log-pdf produced by Gaussian Kernel Density Estimation. Vertical lines represent minimum, mean, and maximum angular resolutions, respectively.
    _ middle_: Cumulative distribution of core separations divided by the angular resolution of the observations for clumps in each distance.
    _ bottom_: Separation distribution in au scale by converting the angular separation into a physical separation using the source distance. Vertical lines represent minimum, mean, and maximum linear resolutions, respectively.
     (*fig:sep_obs_ori*)

</div>
<div id="div_fig3">

<img src="tmp_2407.06845/./f_Sep_3d_8type_ws.png" alt="Fig13" width="100%"/>

**Figure 13. -** 
    Separation distribution in various styles. Left panels display them in physical scale. Right panels display them in normalized to the clump thermal Jeans length. Each column represents as follows. Core separation measured from
    (a) original images,
    (b) smoothed images,
    (c) for cores whose mass is higher than $1 M_\odot$, and
    (d) the cores whose mass is higher than  $3 M_\odot$.
     (*fig:sep_3d*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2407.06845"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\fa}[1]{\textcolor{blue}{#1}}$
$\newcommand{\sn}[1]{\textcolor{olive}{#1}}$</div>



<div id="title">

# Transferring spectroscopic stellar labels\ to 217 million $_ Gaia_$ DR3 XP stars with ${\tt SHBoost}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2407.06963-b31b1b.svg)](https://arxiv.org/abs/2407.06963)<mark>Appeared on: 2024-07-10</mark> -  _Submitted to A&A. 13 pages, 13 figures + references & appendices. Data available at this https URL_

</div>
<div id="authors">

A. Khalatyan, et al. -- incl., <mark>G. Guiglion</mark>

</div>
<div id="abstract">

**Abstract:** With $_ Gaia_$ Data Release 3 (DR3), new and improved astrometric, photometric, and spectroscopic measurements for 1.8 billion stars are available. Alongside this wealth of new data, however, come challenges in finding increasingly efficient and accurate computational methods to use for analysis. In this paper we explore the feasibility of using machine-learning regression as a method of extracting basic stellar parameters and line-of-sight extinctions, given spectro-photometric data. To this end, we build a stable gradient-boosted random-forest regressor ( ${\tt xgboost}$ ), trained on spectroscopic data, capable of producing output parameters with reliable uncertainties from $_ Gaia_$ DR3 data (most notably the low-resolution XP spectra) without ground-based spectroscopic observations. Using Shapley additive explanations, we are also able to interpret how the predictions for each star are influenced by each data feature. For the training and testing of the network, we use high-quality parameters obtained from the ${\tt StarHorse}$ code for a sample of around eight million stars observed by major spectroscopic surveys (APOGEE, GALAH, LAMOST, RAVE, SEGUE, and GES), complemented by curated samples of hot stars, very metal-poor stars, white dwarfs, and hot sub-dwarfs. The training data cover the whole sky, all Galactic components, and almost the full magnitude range of the $_ Gaia_$ DR3 XP sample of more than 217 million objects that also have parallaxes. We achieve median uncertainties (at $G\approx16$ ) of 0.20 mag in V-band extinction, 0.01 dex in logarithmic effective temperature, 0.20 dex in surface gravity, 0.18 dex in metallicity, and $12\%$ in mass (over the full $_ Gaia_$ DR3 XP sample, with considerable variations in precision as a function of magnitude and stellar type). We succeed in predicting competitive results based on $_ Gaia_$ DR3 XP spectra compared to classical isochrone or spectral-energy distribution fitting methods we employed in earlier work, especially for the parameters $A_V$ , $T_{\rm eff}$ , and metallicity. Finally, we showcase some potential applications of this new catalogue (e.g. extinction maps, metallicity trends in the Milky Way, extended maps of young massive stars, metal-poor stars, and metal-rich stars).

</div>

<div id="div_fig1">

<img src="tmp_2407.06963/./im/shngboost_q23_xp_test_kiel_magbins.png" alt="Fig1" width="100%"/>

**Figure 1. -** Unfiltered _ Kiel_ diagrams of the full _ Gaia_ DR3 XP sample, in four broad bins of observed $G$ magnitude. (*fig:kiel*)

</div>
<div id="div_fig2">

<img src="tmp_2407.06963/./im/ngboost_performance_testdataset_all.png" alt="Fig11" width="100%"/>

**Figure 11. -** Performance of the {\tt xgboost} models for the test datasets for each of the training labels. In the top row, we show the {\tt SHBoost}(mean) parameters predicted from _ Gaia_ DR3, 2MASS, and AllWISE against the spectroscopic values (test labels). The middle row shows the residuals (predicted $-$ "true"). The bottom row shows the formal uncertainties (derived with {\tt xgboost-distributions}). Each panel contains logarithmic density plots of the full sample of 217 million stars. The lines and shaded regions in the middle and bottom rows show the running median and 1$\sigma$ quantiles, respectively.
     (*fig:validation-onetoone*)

</div>
<div id="div_fig3">

<img src="tmp_2407.06963/./im/metallicity_calibration.png" alt="Fig8" width="100%"/>

**Figure 8. -** [Fe/H] calibration based on members of open and globular clusters. Top left panel: One-to-one comparison of the {\tt xgb\_met} values for open and globular cluster members with spectroscopic [Fe/H] measurements from the literature (using \citealt{Joshi2024} for open clusters and \citealt{Harris2010} for globular clusters). Second row: residuals between {\tt xgb\_met} and literature [Fe/H](left panel) and {\tt feh\_calibrated} and literature [Fe/H], showing the improvement achieved by our proposed calibration. Top right panel: comparison of the three metallicity distributions (literature, {\tt xgb\_met}, and {\tt feh\_calibrated}) for the cluster sample. (*fig:feh_calib*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2407.06963"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\noun}[1]{\textsc{\MakeLowercase{#1}}}$
$\newcommand{\citehere}[1]{\textcolor{RubineRed}{(CITE:~#1)}}$
$\newcommand{\bluenote}[1]{\textcolor{NavyBlue}{#1}}$
$\newcommand{\rednote}[1]{\textcolor{Red}{\textbf{#1}}}$
$\newcommand{\code}{\texttt}$
$\newcommand{\m}[1]{M_#1}$
$\newcommand{\msun}{M_\odot}$
$\newcommand{\bah}{[Ba/H]}$
$\newcommand{\bafe}{[Ba/Fe]}$
$\newcommand{\yh}{[Y/H]}$
$\newcommand{\yfe}{[Y/Fe]}$
$\newcommand{\sH}{[s/H]}$
$\newcommand{\sfe}{[s/Fe]}$
$\newcommand{\rl}{r\textsubscript{L}}$
$\newcommand{\rorb}{R\textsubscript{orb}}$</div>



<div id="title">

# Ba Enrichment in Gaia MS+WD Binaries: Tracing _s_-Process Element Production

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2407.07048-b31b1b.svg)](https://arxiv.org/abs/2407.07048)<mark>Appeared on: 2024-07-10</mark> -  _12 pages, 10 figures_

</div>
<div id="authors">

P. Rekhi, et al. -- incl., <mark>H.-W. Rix</mark>

</div>
<div id="abstract">

**Abstract:** A large population of intermediate-separation binaries, consisting of a main-sequence (MS) star and a white dwarf (WD), has recently emerged from Gaia's third data release (DR3), posing challenges to current models of binary evolution. Here we examine the _s_ -process element abundances in these systems using data from GALAH DR3. Following refined sample analysis with parameter estimates based on GALAH spectra, we find a distinct locus where enhanced _s_ -process elements depend on both the WD mass and metallicity, consistent with loci identified in previous asymptotic giant branch (AGB) nucleosynthesis studies with higher _s_ -process yields. Notably, these enhanced abundances show no correlation with the systems' orbital parameters, supporting a history of accretion in intermediate-separation MS+WD systems. Consequently, our results form a direct observational evidence of a connection between AGB masses and _s_ -process yields. We conclude by showing that the GALAH DR3 survey includes numerous Ba dwarf stars, within and beyond the mass range covered in our current sample, which can further elucidate _s_ -process element distributions in MS+WD binaries.

</div>

<div id="div_fig1">

<img src="tmp_2407.07048/./figures/BaFe_m1_NCE_cond.png" alt="Fig8.1" width="25%"/><img src="tmp_2407.07048/./figures/BaFe_ecc_NCE_cond.png" alt="Fig8.2" width="25%"/><img src="tmp_2407.07048/./figures/BaFe_period_NCE_cond.png" alt="Fig8.3" width="25%"/><img src="tmp_2407.07048/./figures/BaFe_rL_NCE_cond.png" alt="Fig8.4" width="25%"/>

**Figure 8. -** Barium abundance as a function of (a) primary mass, (b) eccentricity, (c) period and (d) Roche-lobe radius of the WD progenitor normalized by the orbital separation. Median error bars are shown in grey. The stars plotted as blue dots belong to the region $\m$2 $<0.75$ and [Fe/H] $<-0.08$, and those plotted as orange crosses are outside the given region (see Figure \ref{fig: feh_m2} and text for explanation). The restricted domain of the _s_-process enriched stars in the above plots is a direct consequence of the restriction on $\m$2.
    Roche-lobe radii are computed using the formula given by Eggleton_1983_AproximationsRadii, with the initial-final mass relation used to obtain progenitor masses taken from [ and Ramirez-Ruiz (2018)](). (*fig: Ba_fe orbital params*)

</div>
<div id="div_fig2">

<img src="tmp_2407.07048/./figures/feh_m2_BaFe_NCE.png" alt="Fig5.1" width="50%"/><img src="tmp_2407.07048/./figures/feh_m2_YFe_NCE.png" alt="Fig5.2" width="50%"/>

**Figure 5. -** (a) Barium and (b) Yttrium abundance as a function of iron abundance of the primary and mass of the secondary (which we assume is a WD). Median error bars are shown in grey. We note that enhanced Ba abundances are found almost exclusively in the region of $\m$2 $<0.75$ and [Fe/H]$<-0.08$(bounded in green), with Y enhanced stars extending to higher metallicities.  (*fig: feh_m2*)

</div>
<div id="div_fig3">

<img src="tmp_2407.07048/figures/s_proc_vs_BaY_fe.png" alt="Fig10" width="100%"/>

**Figure 10. -** Correlations between abundance measurements of _s_-process elements in the NCE sample. Although, GALAH flags high abundances of _s_-process elements other than Ba and Y as possibly unreliable, we demonstrate here that their abundances are positively correlated with Ba and Y, justifying their use as representative of the _s_-process in this work.  (*fig: s-proc corner*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2407.07048"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

122  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

16  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
