# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Li  ->  J. Li  |  ['J. Li']
K. Kreckel  ->  K. Kreckel  |  ['K. Kreckel']


T. Henning  ->  T. Henning  |  ['T. Henning']
C. Gieser  ->  C. Gieser  |  ['C. Gieser']
D. Semenov  ->  D. Semenov  |  ['D. Semenov']
G. Guiglion  ->  G. Guiglion  |  ['G. Guiglion']
M. Samland  ->  M. Samland  |  ['M. Samland']
M. Benisty  ->  M. Benisty  |  ['M. Benisty']
W. Brandner  ->  W. Brandner  |  ['W. Brandner']
G. Chauvin  ->  G. Chauvin  |  ['G. Chauvin']
P. Garcia  ->  A. P. Garcia  |  ['P. Garcia']
T. Henning  ->  T. Henning  |  ['T. Henning']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
P. Mollière  ->  P. Mollière  |  ['P. Mollière']
Arxiv has 74 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2507.05323


not a gzip file


Retrieving document from  https://arxiv.org/e-print/2507.05414


extracting tarball to tmp_2507.05414...

 done.
Retrieving document from  https://arxiv.org/e-print/2507.05830


extracting tarball to tmp_2507.05830... done.


T. Henning  ->  T. Henning  |  ['T. Henning']
C. Gieser  ->  C. Gieser  |  ['C. Gieser']
D. Semenov  ->  D. Semenov  |  ['D. Semenov']


Found 151 bibliographic references in tmp_2507.05830/aa54018-25.bbl.
Retrieving document from  https://arxiv.org/e-print/2507.05901


extracting tarball to tmp_2507.05901...

 done.


G. Guiglion  ->  G. Guiglion  |  ['G. Guiglion']


Found 67 bibliographic references in tmp_2507.05901/aa55695-25.bbl.
Retrieving document from  https://arxiv.org/e-print/2507.06206


extracting tarball to tmp_2507.06206...

 done.


Found 78 bibliographic references in tmp_2507.06206/aa55064-25.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.05830-b31b1b.svg)](https://arxiv.org/abs/2507.05830) | **PRODIGE VI -- Envelope to Disk with NOEMA: VI. The Missing Sulfur Problem**  |
|| J. J. Miranzo-Pastor, et al. -- incl., <mark>T. Henning</mark>, <mark>C. Gieser</mark>, <mark>D. Semenov</mark> |
|*Appeared on*| *2025-07-09*|
|*Comments*| *Main text: 20 pages, 11 figures. Appendixes: 10 pages, 5 figures. Article accepted for publication on Astronomy & Astrophysics*|
|**Abstract**|            Determining the amount of sulfur in volatiles and refractories in the ISM remains one of the main problems in astrochemistry. The detection of H$_2$S ices, which are thought to be one of the main sulfur reservoirs, has not been achieved yet, and the only S-bearing species detected in the ices to date is OCS. PRODIGE large survey observations with NOEMA of several Class 0/I protostars in the Perseus Molecular Cloud provide a perfect opportunity to study the H$_2$S and OCS composition of the ices through the volatiles sublimated in the warm inner core (T$>$100K, n $\sim10^6$cm$^{-3}$) of these protostars. Our aim is to determine the H$_2$S/OCS ratio in the warm inner core of 24 protostars in order to study how it is affected by different factors during its evolution. We used the NOEMA millimeter observations from the PRODIGE program of H$_2$S, H$_2^{33}$S, OCS, OC$^{33}$S and OC$^{34}$S to estimate the H$_2$S and OCS column densities in the warm inner cores. We used SO and SO$_2$ data from the ALMA archive to give a rough estimate of the total sulfur abundance. We explore the chemistry of H$_2$S and OCS in the warm cores using chemical and dynamical simulations of the collapse of a dense core to form a protostar. The estimated H$_2$S/OCS ratio reveals a segregation of the sources into ``OCS-poor'' and ``OCS-rich'' protostars, where the OCS-poor protostars present higher H$_2$S/OCS ratios than the OCS-rich ones. Total sulfur abundance is always dominated by either H$_2$S or OCS, grows with evolution during the Class 0 phase up to $D_S<8$, and decreases again in the Class I. Simulations show that temperature changes in the pre-stellar phase and during the collapse can produce substantial differences in the H$_2$S and OCS abundances and in the H$_2$S/OCS ratio. Our analysis shows that the H$_2$S/OCS ratio is strongly influenced by the environment and the initial conditions of the cloud.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.05901-b31b1b.svg)](https://arxiv.org/abs/2507.05901) | **Survey of Surveys. II. Stellar parameters for 23 millions of stars**  |
|| A. Turchi, et al. -- incl., <mark>G. Guiglion</mark> |
|*Appeared on*| *2025-07-09*|
|*Comments*| *19 pages, 17 figures*|
|**Abstract**|            In the current panorama of large surveys, the vast amount of data obtained with different methods, data types, formats, and stellar samples, is making an efficient use of the available information difficult. The Survey of Surveys is a project to critically compile survey results in a single catalogue, facilitating the scientific use of the available information. In this second release, we present two new catalogs of stellar parameters (Teff, logg, and [Fe/H]). To build the first catalog, SoS-Spectro, we calibrated internally and externally stellar parameters from five spectroscopic surveys (APOGEE, GALAH, Gaia-ESO, RAVE, and LAMOST) and externally on the PASTEL database. The second catalog, SoS-ML catalog, is obtained by using SoS-Spectro as a reference to train a multi-layer perceptron, which predicts stellar parameters based on two photometric surveys, SDSS and SkyMapper. As a novel approach, we build on previous parameters sets, from Gaia DR3 and Andrae et al. (2023), aiming to improve their precision and accuracy. We obtain a catalog of stellar parameters for around 23 millions of stars, which we make publicly available. We validate our results with several comparisons with other machine learning catalogs, stellar clusters, and astroseismic samples. We find substantial improvements in the parameters estimates compared to other Machine Learning methods in terms of precision and accuracy, especially in the metal-poor range, as shown in particular when validating our results with globular clusters. We believe that there are two reasons behind our improved results at the low-metallicity end: first, our use of a reference catalog, the SoS-Spectro, which is calibrated using high-resolution spectroscopic data; and second, our choice to build on pre-existing parameter estimates from em Gaia and Andrae et al., rather than attempting to obtain our predictions from survey data alone.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.06206-b31b1b.svg)](https://arxiv.org/abs/2507.06206) | **Direct imaging discovery of a young giant planet orbiting on Solar System scales**  |
|| T. Stolker, et al. -- incl., <mark>M. Samland</mark>, <mark>M. Benisty</mark>, <mark>W. Brandner</mark>, <mark>G. Chauvin</mark>, <mark>P. Garcia</mark>, <mark>T. Henning</mark>, <mark>L. Kreidberg</mark>, <mark>P. Mollière</mark> |
|*Appeared on*| *2025-07-09*|
|*Comments*| *13 pages, 11 figures, accepted for publication in A&A*|
|**Abstract**|            HD 135344 AB is a young visual binary system that is best known for the protoplanetary disk around the secondary star. The circumstellar environment of the A0-type primary star, on the other hand, is already depleted. HD 135344 A is therefore an ideal target for the exploration of recently formed giant planets because it is not obscured by dust. We searched for and characterized substellar companions to HD 135344 A down to separations of about 10 au. We observed HD 135344 A with VLT/SPHERE in the $H23$ and $K12$ bands and obtained $YJ$ and $YJH$ spectroscopy. In addition, we carried out VLTI/GRAVITY observations for the further astrometric and spectroscopic confirmation of a detected companion. We discovered a close-in young giant planet, HD 135344 Ab, with a mass of about 10 $M_\mathrm{J}$. The multi-epoch astrometry confirms the bound nature based on common parallax and common proper motion. This firmly rules out the scenario of a non-stationary background star. The semi-major axis of the planetary orbit is approximately 15-20 au, and the photometry is consistent with that of a mid L-type object. The inferred atmospheric and bulk parameters further confirm the young and planetary nature of the companion. HD 135344 Ab is one of the youngest directly imaged planets that has fully formed and orbits on Solar System scales. It is a valuable target for studying the early evolution and atmosphere of a giant planet that could have formed in the vicinity of the snowline.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.05414-b31b1b.svg)](https://arxiv.org/abs/2507.05414) | **Physical properties of HII regions at sub-kpc scales using integral field spectroscopy on IC 342**  |
|| J. K. Barrera-Ballesteros, et al. -- incl., <mark>K. Kreckel</mark> |
|*Appeared on*| *2025-07-09*|
|*Comments*| *Accepted for publication at RMxAA. 27 pages, 18 Figures*|
|**Abstract**|            In this study we use Integral Field Spectroscopic (IFS) observations for one of the closest galaxy to us, the grand design spiral IC 342, to derive physical properties of HII regions at sub-kpc scales. This IFS data represents, to our knowledge, the most comprehensive observational effort in the optical for this galaxy. The final IFS datacube consists of 349 individual pointings using the IFS instrumentation from the SDSS-IV MaNGA survey. Using a prototype of the data analysis pipeline that will be devoted to the SDSS-V Local Volume Mapper (LVM) survey, we measure different observables from the emission line in the optical. In particular, using the flux map of the H$\alpha$ emission line, we derive the location and sizes of H ii region candidates for IC 342. Using the integrated flux for different emission lines within each region, we derived the radial distribution of different physical properties from the ionized gas (e.g., optical extinction, H$\alpha$ luminosity, oxygen abundance, etc). Comparing with larger samples of galaxies with IFS data, our results suggest that physical properties of the ionized gas of IC 342 are similar to galaxies with similar stellar mass in the nearby universe.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.05323-b31b1b.svg)](https://arxiv.org/abs/2507.05323) | **SKYSURF-10: A Novel Method for Measuring Integrated Galaxy Light**  |
|| D. D. Carter, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-07-09*|
|*Comments*| *62 pages, 18 figures, 10688 words*|
|**Abstract**|            We describe the drizzling pipeline and contents of the drizzled database for Hubble Space Telescope Cycle 27-29 Archival Legacy project "SKYSURF," the largest archival project ever approved for Hubble. SKYSURF aims to investigate the extragalactic background light (EBL) using all 143,914 ACSWFC, WFC3UVIS, and WFC3IR images that have been taken by Hubble since its launch in 2002. SKYSURF has produced 38,027 single-visit mosaics and 7,893 multi-visit mosaics across 28 ACSWFC, WFC3UVIS, and WFC3IR filters using non-standard drizzling methods, which include preserving the lowest sky-level of each visit/group in the drizzled products, applying wider apertures for cosmic ray rejection, correcting effects caused by charge transfer efficiency (CTE) degradation, and removing potential light gradients from input images via sky-map subtraction. We generate source catalogs for all drizzled products with Source Extractor and provide updated star-galaxy separation parameters and integrated galaxy light (IGL) estimates for 25 of the 28 SKYSURF filters (wavelength range 0.2-1.7 microns) using a novel IGL fitting method made possible by the vast SKYSURF dataset. We discuss the data processing and data analysis challenges encountered, detail our solutions, and offer suggestions that may facilitate future large-scale IGL investigations with Webb, SPHEREx, and Roman.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2507.05830/./figures/histoCompPreMHD.png', 'tmp_2507.05830/./figures/comparison_15K.png', 'tmp_2507.05830/./figures/comparison_25K.png', 'tmp_2507.05830/./figures/maps/B1-bS_mean-cropped.png', 'tmp_2507.05830/./figures/maps/HH211_mean-cropped.png', 'tmp_2507.05830/./figures/maps/IRAS2A_mean-cropped.png', 'tmp_2507.05830/./figures/maps/IRAS4B_mean-cropped.png']
copying  tmp_2507.05830/./figures/histoCompPreMHD.png to _build/html/
copying  tmp_2507.05830/./figures/comparison_15K.png to _build/html/
copying  tmp_2507.05830/./figures/comparison_25K.png to _build/html/
copying  tmp_2507.05830/./figures/maps/B1-bS_mean-cropped.png to _build/html/
copying  tmp_2507.05830/./figures/maps/HH211_mean-cropped.png to _build/html/
copying  tmp_2507.05830/./figures/maps/IRAS2A_mean-cropped.png to _build/html/
copying  tmp_2507.05830/./figures/maps/IRAS4B_mean-cropped.png to _build/html/
exported in  _build/html/2507.05830.md
    + _build/html/tmp_2507.05830/./figures/histoCompPreM

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\citet}[1]{\textcolor[rgb]{0,0,1}{\oldcitet{#1}}}$
$\newcommand{\citep}[1]{\textcolor[rgb]{0,0,1}{\oldcitep{#1}}}$
$\newcommand{\citealp}[1]{\textcolor[rgb]{0,0,1}{\oldcitealp{#1}}}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$</div>



<div id="title">

# PRODIGE - Envelope to Disk with NOEMA$\thanks{Based on observations carried out under project number L19MB with the IRAM NOEMA Interferometer. IRAM is supported by INSU/CNRS (France), MPG (Germany) and IGN (Spain)}$: VI: The missing Sulfur Problem

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2507.05830-b31b1b.svg)](https://arxiv.org/abs/2507.05830)<mark>Appeared on: 2025-07-09</mark> -  _Main text: 20 pages, 11 figures. Appendixes: 10 pages, 5 figures. Article accepted for publication on Astronomy & Astrophysics_

</div>
<div id="authors">

J. J. Miranzo-Pastor, et al. -- incl., <mark>T. Henning</mark>, <mark>C. Gieser</mark>, <mark>D. Semenov</mark>

</div>
<div id="abstract">

**Abstract:** Determining the amount of sulfur in volatiles and refractories in the interstellar medium remains one of the main problems in astrochemistry. The detection of $H_2$ S ices, which are thought to be one of the main sulfur reservoirs, is still a great challenge and has not been achieved yet, and the only sulfur-bearing species detected in the ices to date is OCS. PRODIGE (PROtostars and DIsks: Global Evolution) large survey observations with the NOrthern Extended Millimeter Array (NOEMA) of several Class 0/I protostars in the Perseus Molecular Cloud provide a perfect opportunity to study the $H_2$ S and OCS composition of the ices through the volatiles sublimated in the warm inner core (T>100K, $n\sim$ 10 $^6$ cm $^{-3}$ ) of these protostars. Our aim is to determine the $H_2$ S/OCS ratio in the warm inner core of the protostars of our sample in order to study how it is affected by different factors during its evolution. We used the NOEMA millimeter observations from the PRODIGE program of $H_2$ S, $H_2^{33}$ S, OCS, OC $^{33}$ S and OC $^{34}$ S to estimate the $H_2$ S and OCS column densities in the warm inner core of 24 protostars of Perseus. In addition, we used SO and $SO_2$ data from the Atacama Large Millimeter/submillimeter Array (ALMA) archive to complete the sulfur budget and give a rough estimate of the total sulfur abundance in each of the sources. We explore the chemistry of $H_2$ S and OCS in the warm cores using chemical and dynamical simulations of the collapse of a dense core to form a protostar. The compound $H_2$ S is detected in 21 protostars and OCS in 17 protostars of our sample. The estimated $H_2$ S/OCS ratio reveals a segregation of the sources into "OCS-poor" and "OCS-rich" protostars, where the OCS-poor protostars present higher $H_2$ S/OCS ratios than the OCS-rich ones. Total sulfur abundance, which is always dominated by either $H_2$ S or OCS, grows with evolution during the Class 0 phase, reaching a minimum depletion of a factor $<$ 8 in the Class 0/I objects, and decreasing again in the Class I. Simulations show that temperature changes in the pre-stellar phase and during the collapse can produce substantial differences in the $H_2$ S and OCS (ice and gas-phase) abundances and in the $H_2$ S/OCS ratio. Our analysis shows that the $H_2$ S/OCS ratio is strongly influenced by the environment and the initial conditions of the cloud.

</div>

<div id="div_fig1">

<img src="tmp_2507.05830/./figures/histoCompPreMHD.png" alt="Fig10" width="100%"/>

**Figure 10. -** Evolution of the models with different warming curves. _Left:_ Initial ice and gas-phase components of the $H_2$S, OCS, SO and $SO_2$ species in the inner core before the collapse. Comparison with the final abundance of these species in the gas-phase after the Fiducial MHD collapse. _Center:_ Comparison of the Fiducial MHD warming curve with the rest of warming curves explored in the section. _Right:_ Comparison of the Fiducial MHD density curve with the rest of warming curves explored in the section. The different density curves are obtained using the barotropic equation of state from $\citet${Machida2006}.
             (*fig:histoCompPreMHD*)

</div>
<div id="div_fig2">

<img src="tmp_2507.05830/./figures/comparison_15K.png" alt="Fig13.1" width="50%"/><img src="tmp_2507.05830/./figures/comparison_25K.png" alt="Fig13.2" width="50%"/>

**Figure 13. -** Comparison between the 15 K and 25 K models. Initial ice and gas-phase components of the $H_2$S, OCS, SO, $SO_2$ and $H_2$$S_3$ species in the inner core before the collapse. Comparison with the final abundance of these species in the gas-phase after the collapse. There is a significant fall of $H_2$S, OCS, SO and $SO_2$ in the ices after the 25 K pre-phase, compared with the 15 K pre-stellar phase. On the contrary, $H_2$$S_3$ is formed in the ices during the warmer pre-phase, something that did not happen in the cooler one. After the collapse of the 25 K model, the final $H_2$S, SO and $SO_2$ drops in $\sim$1 order of magnitude with respect to the 15 K simulation. This is not true for OCS, whose abundance falls more than 2 orders of magnitude. $H_2$$S_3$ final gas-phase abundance is greater in the warmer model, reaching an abundance of $\sim$5$\times$10$^{-6}$.Initial Temperature before the pre-phase: 15 KInitial Temperature before de pre-phase: 25 K (*fig:model_comparison*)

</div>
<div id="div_fig3">

<img src="tmp_2507.05830/./figures/maps/B1-bS_mean-cropped.png" alt="Fig6.1" width="25%"/><img src="tmp_2507.05830/./figures/maps/HH211_mean-cropped.png" alt="Fig6.2" width="25%"/><img src="tmp_2507.05830/./figures/maps/IRAS2A_mean-cropped.png" alt="Fig6.3" width="25%"/><img src="tmp_2507.05830/./figures/maps/IRAS4B_mean-cropped.png" alt="Fig6.4" width="25%"/>

**Figure 6. -** Emission of the main species ($H_2$S, and both OCS lines) in the inner core of protostars B1bS, HH211MMS, IRAS2A, IRAS4B. The colormap represents the >3$\sigma$ emission integrated images in a 4$\arcsec\times$4$\arcsec$ square region. The color scale, shown at the right of each map, is the brightness temperature in K. The white contours represent 10\%, 30\%, 50\%, 70\% and 90\% of the peak temperature. The pink star shows the position of the protostar, determined by the position of the maximum emission in the continuum $\citep${Tobin2016}. The green cross marks the point with maximum emission of the line.B1bSHH211MMSIRAS2AIRAS4B (*fig:moment-0-maps*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2507.05830"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Survey of Surveys. II. Stellar parameters for 23 millions of stars

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2507.05901-b31b1b.svg)](https://arxiv.org/abs/2507.05901)<mark>Appeared on: 2025-07-09</mark> -  _19 pages, 17 figures_

</div>
<div id="authors">

A. Turchi, et al. -- incl., <mark>G. Guiglion</mark>

</div>
<div id="abstract">

**Abstract:** In the current panorama of large surveys, the vast amount of data obtained with different methods, data types, formats, and stellar samples, is making an efficient use of the available information difficult. The Survey of Surveys is a project to critically compile survey results in a single catalogue, facilitating the scientific use of the available information. In this second release, we present two new catalogs of stellar parameters (T $_{\rm{eff}}$ , log $g$ , and [ Fe/H ] ). To build the first catalog, SoS-Spectro, we calibrated internally and externally stellar parameters from five spectroscopic surveys (APOGEE, GALAH, Gaia-ESO, RAVE, and LAMOST). Our external calibration on the PASTEL database of high-resolution spectroscopy ensures better performances for metal-poor red giants. The second catalog, SoS-ML catalog, is obtained by using SoS-Spectro as a reference to train a multi-layer perceptron, which predicts stellar parameters based on two photometric surveys, SDSS and SkyMapper. As a novel approach, we build on previous parameters sets, from $* Gaia*$ DR3 and $* Andrae et al. (2023)*$ , aiming to improve their precision and accuracy. We obtain a catalog of stellar parameters for around 23 millions of stars, which we make publicly available. We validate our results with several comparisons with other machine learning catalogs, stellar clusters, and astroseismic samples. We find substantial improvements in the parameters estimates compared to other Machine Learning methods in terms of precision and accuracy, especially in the metal-poor range, as shown in particular when validating our results with globular clusters. We believe that there are two reasons behind our improved results at the low-metallicity end: first, our use of a reference catalog—the SoS-Spectro—which is calibrated using high-resolution spectroscopic data; and second, our choice to build on pre-existing parameter estimates from $* Gaia*$ and Andrae et al., rather than attempting to obtain our predictions from survey data alone.

</div>

<div id="div_fig1">

<img src="tmp_2507.05901/./KIEL.png" alt="Fig13" width="100%"/>

**Figure 13. -** Kiel Diagram for the SM (above) and SDSS (below) full sample, colored with the estimated errors on the three parameters. From left to right: T$_{\rm{eff}}$, log $g$, and [Fe/H]. Hexagonal bins are colored based on the average of the errors inside the bin. The black line approximately encloses the region covered by the respective "train\_area" flags described in Sect.\ref{sec:cat}. (*KielA*)

</div>
<div id="div_fig2">

<img src="tmp_2507.05901/./metallicity_summary_v2.png" alt="Fig15.1" width="50%"/><img src="tmp_2507.05901/./metallicity_summary_saga_v2.png" alt="Fig15.2" width="50%"/>

**Figure 15. -** Comparison of the [Fe/H] predicted by this work and by three literature ML catalogs with the PASTEL (top row) and SAGA (bottom row) databases. Our results are shown in the leftmost plots, where the grey points show the results before, and the purple ones after, augmenting the reference SoS-Spectro catalog with very metal-poor stars. The median [Fe/H] differences for the before and after augmentation samples (computed only for [Fe/H] < –2) are indicated in the top-left corner of the first plot in each row. A similar comparison is presented for the following catalogs: \citet[][ blue, center-left panels]{andrae23}, \citet[][ red, center-right panels]{gu25}, and \citet[][ green, rightmost panels]{Zhang2023}.  (*fig:metal_sum*)

</div>
<div id="div_fig3">

<img src="tmp_2507.05901/./zhang_comp_ondens2.png" alt="Fig10.1" width="33%"/><img src="tmp_2507.05901/./gu_comp_ondens.png" alt="Fig10.2" width="33%"/><img src="tmp_2507.05901/./andrae_comp_ondens2.png" alt="Fig10.3" width="33%"/>

**Figure 10. -** Comparison of atmospheric parameters predicted by our ML approach (abscissae) and literature ML catalogs (ordinates). The left plots show T$_{\rm{eff}}$ comparisons, the middle ones log $g$, and the right ones [Fe/H]. The top row shows the comparison with [Zhang, Green and Rix (2023)](), the middle row with [Gu, Fan and Zhao (2025)](), and the bottom row with [Andrae, Rix and Chandra (2023)](). The dashed line represents the 1:1 relationship. (*fig:litml*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2507.05901"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\todo}[1]{\textcolor{red}{#1}}$
$\newcommand{\ffdeg}{\mbox{\ensuremath{.\!\!◦}}}$
$\newcommand{\ffarcs}{\mbox{\ensuremath{.\!\!^{\prime\prime}}}}$
$\newcommand{\arraystretch}{1.25}$
$\newcommand{\arraystretch}{1.25}$
$\newcommand{\arraystretch}{1.25}$
$\newcommand{\arraystretch}{1.25}$
$\newcommand{\arraystretch}{1.25}$
$\newcommand{\arraystretch}{1.25}$</div>



<div id="title">

# Direct imaging discovery of a young giant planet\\orbiting on Solar System scales

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2507.06206-b31b1b.svg)](https://arxiv.org/abs/2507.06206)<mark>Appeared on: 2025-07-09</mark> -  _13 pages, 11 figures, accepted for publication in A&A_

</div>
<div id="authors">

T. Stolker, et al. -- incl., <mark>M. Samland</mark>, <mark>M. Benisty</mark>, <mark>W. Brandner</mark>, <mark>G. Chauvin</mark>, <mark>P. Garcia</mark>, <mark>T. Henning</mark>, <mark>L. Kreidberg</mark>, <mark>P. Mollière</mark>

</div>
<div id="abstract">

**Abstract:** HD 135344 AB is a young visual binary system that is best known for the protoplanetary disk around the secondary star. The circumstellar environment of the A0-type primary star, on the other hand, is already depleted. HD 135344 A is therefore an ideal target for the exploration of recently formed giant planets because it is not obscured by dust. We searched for and characterized substellar companions to HD 135344 A down to separations of about 10 au. We observed HD 135344 A with VLT/SPHERE in the $H23$ and $K12$ bands and obtained $YJ$ and $YJH$ spectroscopy. In addition, we carried out VLTI/GRAVITY observations for the further astrometric and spectroscopic confirmation of a detected companion. We discovered a close-in young giant planet, HD 135344 Ab, with a mass of about 10 $M_\mathrm{J}$ . The multi-epoch astrometry confirms the bound nature based on common parallax and common proper motion. This firmly rules out the scenario of a non-stationary background star. The semi-major axis of the planetary orbit is approximately 15--20 au, and the photometry is consistent with that of a mid L-type object. The inferred atmospheric and bulk parameters further confirm the young and planetary nature of the companion. HD 135344 Ab is one of the youngest directly imaged planets that has fully formed and orbits on Solar System scales. It is a valuable target for studying the early evolution and atmosphere of a giant planet that could have formed in the vicinity of the snowline.

</div>

<div id="div_fig1">

<img src="tmp_2507.06206/./fig_relative_astrometry.png" alt="Fig2" width="100%"/>

**Figure 2. -** Astrometric measurements relative to the first epoch. The crosses show the positions of HD 135344 Ab, which moves eastward. The colored circles show the positions of the suspected background sources in the IRDIS field of view, which are connected with dotted lines between epochs. The dashed line shows the track for a stationary background source, and the gray circles and squares indicate the three SPHERE and three GRAVITY epochs, respectively, after the initial detection. The small dots represent a sample of Gaia sources within 0.1 deg from HD 135344 A (see main text for details). The colors indicate a specific epoch, for example, all pink markers correspond to 2021 Jul 16. (*fig:relative_astrometry*)

</div>
<div id="div_fig2">

<img src="tmp_2507.06206/./fig_age_separation.png" alt="Fig7" width="100%"/>

**Figure 7. -** Age vs. semi-major axis of directly imaged planets. We selected young companions with planetary masses (except for HD 206893 B), $M \lesssim 13$ $M_\mathrm{J}$, planet-to-star mass ratios of $q \lesssim \frac{1}{25}$, and orbits smaller than $\approx$100 au. The locations of the giant planets in the Solar System are indicated with vertically dashed lines, and the gray areas from left to right are separated by the approximate locations of the $H_2$O and $CO_2$ ice lines of an A0-type star  ([Öberg, Murray-Clay and Bergin 2011]()) . The semi-major axes were retrieved from \texttt{whereistheplanet} when available  ([Wang, Kulikauskas and Blunt 2021]()) , and from [Wang, Vigan and Lacour (2021)](), [Hinkley, Lacour and Marleau (2023)](), [Currie, Brandt and Brandt (2023)](), and [Nielsen and Wahhaj (2023)]() otherwise. The ages were adopted from [Pecaut, Mamajek and Bubar (2012)](), [Bell, Mamajek and Naylor (2015)](), [Macintosh, Graham and Barman (2015)](), [Chauvin, Desidera and Lagrange (2017)](), [Müller, Keppler and Henning (2018)](), [Garufi, Benisty and Pinilla (2018)](), [ and Zuckerman (2019)](), [Miret-Roig, Galli and Brandner (2020)](), [Brandt, et. al (2021)](), and [Hinkley, Lacour and Marleau (2023)](). Systematic uncertainties on the ages (e.g., due to uncertain cluster membership) are not reflected by the error bars. (*fig:age_separation*)

</div>
<div id="div_fig3">

<img src="tmp_2507.06206/./fig_sed_planet.png" alt="Fig11" width="100%"/>

**Figure 11. -** Near-infrared spectral energy distribution of HD 135344 Ab. The black line shows the best-fit model spectrum from Sonora Diamondback, and the gray lines show 30 random samples from the posterior distribution, both shown at $R = 500$. The SPHERE/IFS and GRAVITY spectra are shown with circular markers, and the latter are downsampled for clarity. The SPHERE/IRDIS photometry is shown by square markers, with horizontal error bars indicating the FWHM of the corresponding filter profiles. The bottom panel shows the residuals of the best-fit model, calculated at the resolution and wavelength sampling of the data. (*fig:sed_planet*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2507.06206"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

137  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

17  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

9  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
