# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
H. Korhonen  ->  H. Korhonen  |  ['H. Korhonen']
R. Burn  ->  R. Burn  |  ['R. Burn']
K. Lee  ->  K. Lee  |  ['K. Lee']
K. Lee  ->  K. Lee  |  ['K. Lee']
K. Lee  ->  K. Lee  |  ['K. Lee']
K. Lee  ->  K. Lee  |  ['K. Lee']
Arxiv has 63 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2406.18657


extracting tarball to tmp_2406.18657...

 done.
  0: tmp_2406.18657/dmtau.tex, 1,032 lines
  1: tmp_2406.18657/aassymbols.tex, 579 lines
  2: tmp_2406.18657/natbib.tex, 96 lines
  3: tmp_2406.18657/natnotes.tex, 332 lines
Retrieving document from  https://arxiv.org/e-print/2406.18733



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2406.18733...

 done.
Retrieving document from  https://arxiv.org/e-print/2406.18991


extracting tarball to tmp_2406.18991...

 done.


Found 85 bibliographic references in tmp_2406.18991/MR_TTV_RV.bbl.
Error retrieving bib data for NeVo2016: 'author'
Error retrieving bib data for Pringle1981: 'author'
Error retrieving bib data for kubyshkina2021: 'author'
Error retrieving bib data for Johnstone2020: 'author'
Error retrieving bib data for Johnstone2020: 'author'
Retrieving document from  https://arxiv.org/e-print/2406.19286


extracting tarball to tmp_2406.19286...

 done.
Retrieving document from  https://arxiv.org/e-print/2406.19287



  exec(code_obj, self.user_global_ns, self.user_ns)
'PosixPath' object is not subscriptable


extracting tarball to tmp_2406.19287... done.



  exec(code_obj, self.user_global_ns, self.user_ns)
'PosixPath' object is not subscriptable


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18991-b31b1b.svg)](https://arxiv.org/abs/2406.18991) | **Resonant sub-Neptunes are puffier**  |
|| A. Leleu, et al. -- incl., <mark>R. Burn</mark> |
|*Appeared on*| *2024-06-28*|
|*Comments*| **|
|**Abstract**|            A systematic, population-level discrepancy exists between the densities of exoplanets whose masses have been measured with transit timing variations (TTVs) versus those measured with radial velocities (RVs). Since the TTV planets are predominantly nearly resonant, it is still unclear whether the discrepancy is attributed to detection biases or to astrophysical differences between the nearly resonant and non resonant planet populations. We defined a controlled, unbiased sample of 36 sub-Neptunes characterised by Kepler, TESS, HARPS, and ESPRESSO. We found that their density depends mostly on the resonant state of the system, with a low probability (of $0.002_{-0.001}^{+0.010}$) that the mass of (nearly) resonant planets is drawn from the same underlying population as the bulk of sub-Neptunes. Increasing the sample to 133 sub-Neptunes reveals finer details: the densities of resonant planets are similar and lower than non-resonant planets, and both the mean and spread in density increase for planets that are away from resonance. This trend is also present in RV-characterised planets alone. In addition, TTVs and RVs have consistent density distributions for a given distance to resonance. We also show that systems closer to resonances tend to be more co-planar than their spread-out counterparts. These observational trends are also found in synthetic populations, where planets that survived in their original resonant configuration retain a lower density; whereas less compact systems have undergone post-disc giant collisions that increased the planet's density, while expanding their orbits. Our findings reinforce the claim that resonant systems are archetypes of planetary systems at their birth.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18657-b31b1b.svg)](https://arxiv.org/abs/2406.18657) | **Exploring the Complex Ionization Environment of the Turbulent DM Tau Disk**  |
|| D. E. Long, et al. -- incl., <mark>K. Schwarz</mark> |
|*Appeared on*| *2024-06-28*|
|*Comments*| *18 pages, 12 figures, accepted to be published in The Astrophysical Journal (June 25, 2024)*|
|**Abstract**|            Ionization drives important chemical and dynamical processes within protoplanetary disks, including the formation of organics and water in the cold midplane and the transportation of material via accretion and magneto-hydrodynamic (MHD) flows. Understanding these ionization-driven processes is crucial for understanding disk evolution and planet formation. We use new and archival ALMA observations of HCO+, H13CO+, and N2H+ to produce the first forward-modeled 2D ionization constraints for the DM Tau protoplanetary disk. We include ionization from multiple sources and explore the disk chemistry under a range of ionizing conditions. Abundances from our 2D chemical models are post-processed using non-LTE radiative transfer, visibility sampling, and imaging, and are compared directly to the observed radial emission profiles. The observations are best fit by a modestly reduced CR ionization rate ($\zeta_{CR}$ ~ 10$^{-18}$ s$^{-1}$) and a hard X-ray spectrum (hardness ratio [HR] = 0.3), which we associate with stellar flaring conditions. Our best-fit model under-produces emission in the inner disk, suggesting that there may be an additional mechanism enhancing ionization in DM Tau's inner disk. Overall, our findings highlight the complexity of ionization in protoplanetary disks and the need for high resolution multi-line studies.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18733-b31b1b.svg)](https://arxiv.org/abs/2406.18733) | **Digging deeper into the dense Galactic globular cluster Terzan 5 with Electron-Multiplying CCDs. Variable star detection and new discoveries**  |
|| R. F. Jaimes, et al. -- incl., <mark>H. Korhonen</mark> |
|*Appeared on*| *2024-06-28*|
|*Comments*| *22 pages, 18 figures, 7 tables. Accepted for publication in A&A*|
|**Abstract**|            Context. High frame-rate imaging was employed to mitigate the effects of atmospheric turbulence (seeing) in observations of globular cluster Terzan 5. Aims. High-precision time-series photometry has been obtained with the highest angular resolution so far taken in the crowded central region of Terzan 5, with ground-based telescopes, and ways to avoid saturation of the brightest stars in the field observed. Methods. The Electron-Multiplying Charge Coupled Device (EMCCD) camera installed at the Danish 1.54-m telescope at the ESO La Silla Observatory was employed to produce thousands of short-exposure time images (ten images per second) that were stacked to produce the normal-exposure-time images (minutes). We employed difference image analysis in the stacked images to produce high-precision photometry using the DanDIA pipeline. Results. Light curves of 1670 stars with 242 epochs were analyzed in the crowded central region of Terzan 5 to statistically detect variable stars in the field observed. We present a possible visual counterpart outburst at the position of the pulsar J1748-2446N, and the visual counterpart light curve of the low-mass X-ray binary CX 3. Additionally, we present the discovery of 4 semiregular variables. We also present updated ephemerides and properties of the only RR Lyrae star previously known in the field covered by our observations in Terzan 5. Finally, we report a significant displacement of two sources by ~0.62 and 0.59 arcseconds with respect to their positions in previous images available in the literature.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Planck' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.19286-b31b1b.svg)](https://arxiv.org/abs/2406.19286) | **Mass composition of ultra-high energy cosmic rays from distribution of their arrival directions with the Telescope Array**  |
|| T. A. Collaboration, et al. -- incl., <mark>K. Lee</mark>, <mark>K. Lee</mark> |
|*Appeared on*| *2024-06-28*|
|*Comments*| *18 pages, 11 figures, accepted for publication in PRD*|
|**Abstract**|            We use a new method to estimate the injected mass composition of ultrahigh cosmic rays (UHECRs) at energies higher than 10 EeV. The method is based on comparison of the energy-dependent distribution of cosmic ray arrival directions as measured by the Telescope Array experiment (TA) with that calculated in a given putative model of UHECR under the assumption that sources trace the large-scale structure (LSS) of the Universe. As we report in the companion letter, the TA data show large deflections with respect to the LSS which can be explained, assuming small extra-galactic magnetic fields (EGMF), by an intermediate composition changing to a heavy one (iron) in the highest energy bin. Here we show that these results are robust to uncertainties in UHECR injection spectra, the energy scale of the experiment and galactic magnetic fields (GMF). The assumption of weak EGMF, however, strongly affects this interpretation at all but the highest energies E > 100 EeV, where the remarkable isotropy of the data implies a heavy injected composition even in the case of strong EGMF. This result also holds if UHECR sources are as rare as $2 \times 10^{-5}$ Mpc$^{-3}$, that is the conservative lower limit for the source number density.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error 'PosixPath' object is not subscriptable</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.19287-b31b1b.svg)](https://arxiv.org/abs/2406.19287) | **Isotropy of cosmic rays beyond $10^{20}$ eV favors their heavy mass composition**  |
|| T. A. Collaboration, et al. -- incl., <mark>K. Lee</mark>, <mark>K. Lee</mark> |
|*Appeared on*| *2024-06-28*|
|*Comments*| *8 pages, 3 figures, accepted for publication in PRL*|
|**Abstract**|            We report an estimation of the injected mass composition of ultra-high energy cosmic rays (UHECRs) at energies higher than 10 EeV. The composition is inferred from an energy-dependent sky distribution of UHECR events observed by the Telescope Array surface detector by comparing it to the Large Scale Structure of the local Universe. In the case of negligible extra-galactic magnetic fields the results are consistent with a relatively heavy injected composition at E ~ 10 EeV that becomes lighter up to E ~ 100 EeV, while the composition at E > 100 EeV is very heavy. The latter is true even in the presence of highest experimentally allowed extra-galactic magnetic fields, while the composition at lower energies can be light if a strong EGMF is present. The effect of the uncertainty in the galactic magnetic field on these results is subdominant.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error 'PosixPath' object is not subscriptable</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2406.18991/./MR_full.png', 'tmp_2406.18991/./cumu_full.png', 'tmp_2406.18991/./controlled_P5to60_R2p7to4.png', 'tmp_2406.18991/./models.png']
copying  tmp_2406.18991/./MR_full.png to _build/html/
copying  tmp_2406.18991/./cumu_full.png to _build/html/
copying  tmp_2406.18991/./controlled_P5to60_R2p7to4.png to _build/html/
copying  tmp_2406.18991/./models.png to _build/html/
exported in  _build/html/2406.18991.md
    + _build/html/tmp_2406.18991/./MR_full.png
    + _build/html/tmp_2406.18991/./cumu_full.png
    + _build/html/tmp_2406.18991/./controlled_P5to60_R2p7to4.png
    + _build/html/tmp_2406.18991/./models.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\me}{  {\rm M}_{\oplus}}$
$\newcommand{\au}{  {\rm au}}$
$\newcommand{\figpath}{./}$
$\newcommand{\bibpath}{./}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\RR}{{\mathbb R}}$
$\newcommand{Ç}{{\mathbb C}}$
$\newcommand{\TT}{{\mathbb T}}$
$\newcommand{\ZZ}{{\mathbb Z}}$
$\newcommand{\NN}{{\mathbb N}}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\qtext}[1]{\quad \text{#1}\quad}$
$\newcommand{\norm}[1]{\vert\vert#1 \vert\vert}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\vect}[3]{\left(\hspace{-0pt} \begin{array}{c}  #1 \  #2 \  #3 \end{array}\hspace{-0pt} \right)}$
$\newcommand{\be}{\begin{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand\Deta{{\Delta \eta} }$
$\newcommand{\troytitle}{{\fontfamily{pzc}\fontsize{20pt}{20pt}\fontseries{b}\selectfont TROY}}$
$\newcommand{\bysareh}[1]{\textcolor{red}{#1}}$
$\newcommand{\der}[2]{\frac{d#1}{d#2}}$
$\newcommand{\dron}[2]{\frac{\partial#1}{\partial#2}}$
$\newcommand*\circled[1]{\tikz[baseline=(char.base)]{$
$            \node[shape=circle,draw,inner sep=1pt] (char) {#1};}}$
$\newcommand{\tpr}[1]{{#1}}$
$\newcommand{\tap}[1]{\textcolor{blue}{#1}}$
$\newcommand{\tme}[1]{\textcolor{green}{#1}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand\cL{{\cal L} }$
$\newcommand*\samethanks[1][\value{footnote}]{\footnotemark[#1]}$
$\newcommand\Fb{{\overline F} }$
$\newcommand{\t}{^\mathrm{T}}$
$\newcommand{\O}{\mathcal{O}}$
$\newcommand\seps{{\sqrt{\epsilon}} }$
$\usepackage[varg]{txfonts}$
$\usepackage{natbib}$
$\newcommand{\citeay}[1]{\citeauthor{#1}, \citeyear{#1}}$
$\input{fullstrs.txt}$
$\input{parfullstrs.txt}$
$\input{controlledstrs.txt}$
$\newcommand\DMMRthre{0.05}$
$\begin{document}$
$\title{Resonant sub-Neptunes are puffier$
$}$
$\titlerunning{Resonant sub-Neptunes are puffier}$
$\author{$
$Adrien Leleu^1,$
$Jean-Baptiste Delisle^1,$
$Remo Burn^2,$
$André Izidoro^{3},$
$Stéphane Udry^1,$
$Xavier Dumusque^1,$
$Christophe Lovis^1,$
$Sarah Millholland^{4,5},$
$Léna Parc^1,$
$François Bouchy^1,$
$Vincent Bourrier^1,$
$Yann Alibert^6,$
$João Faria^1,$
$Christoph Mordasini^6,$
$and Damien Ségransan^1$
$}$
$\authorrunning{A. Leleu et al}$
$\institute{$
$Observatoire de Genève, Université de Genève, Chemin Pegasi, 51, 1290 Versoix, Switzerland. \and$
$Max Planck Institute for Astronomy, Königstuhl 17, 69117 Heidelberg, Germany. \and$
$Department of Earth, Environmental and Planetary Sciences, 6100 MS 126, Rice University, Houston, TX 77005, USA. \and$
$Department of Physics, Massachusetts Institute of Technology, Cambridge, MA 02139, USA. \and$
$MIT Kavli Institute for Astrophysics and Space Research, Massachusetts Institute of Technology, Cambridge, MA 02139, USA. \and$
$Department of Space Research \& Planetary Sciences, University of Bern, Gesellschaftsstrasse 6, CH-3012 Bern, Switzerland.$
$}$
$\abstract{$
$A systematic, population-level discrepancy exists between the densities of exoplanets whose masses have been measured with transit timing variations (TTVs) versus those measured with radial velocities (RVs). Since the TTV planets are predominantly nearly resonant, it is still unclear whether the discrepancy is attributed to detection biases or to astrophysical differences between the nearly resonant and non resonant planet populations. We newcommandined a controlled, unbiased sample of \nbpcontrolled   sub-Neptunes characterised by \textit{Kepler}, TESS, HARPS, and ESPRESSO. We found that their density depends mostly on the resonant state of the system, with a low probability (of \pcontrolledm  ) that the mass of (nearly) resonant planets is drawn from the same underlying population as the bulk of sub-Neptunes. Increasing the sample to 133 sub-Neptunes reveals finer details: the densities of resonant planets are similar and lower than non-resonant planets, and both the mean and spread in density increase for planets that are away from resonance. This trend is also present in RV-characterised planets alone. In addition, TTVs and RVs have consistent density distributions for a given distance to resonance. We also show that systems closer to resonances tend to be more co-planar than their spread-out counterparts. These observational trends are also found in synthetic populations, where planets that survived in their original resonant configuration retain a lower density; whereas less compact systems have undergone post-disc giant collisions that increased the planet's density, while expanding their orbits. Our findings reinforce the claim that resonant systems are archetypes of planetary systems at their birth.$
$}$
$\keywords $
$\maketitle$
$\section{Introduction}$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.49\linewidth]{MR_full.pdf}  \includegraphics[width=0.49\linewidth]{cumu_full.pdf}$
$\caption{\label{fig:full} Full sample of \nbpfull   sub-Neptunes used in this study. The top-left panel shows the mass-radius relation. The (nearly)-resonant population is newcommandined as  \Delta_{MMR}<\DMMRthre   (see Eq. \ref{eq:DMMR}), while the non-resonant population is newcommandined as  \Delta_{MMR}>\DMMRthre . The black line is the sub-Neptune mass-radius relation from [Parc, et. al (2024)](). Other panels show the cumulative distributions for parameters of the planets or their host star. The p_{value} given for each parameter is the probability that the distribution of that parameter is drawn form the same underlying distribution for the (nearly) resonant and non-resonant populations. The potential 2D correlation between parameters is explored in Fig. \ref{fig:fullcomp}.$
$}$
$\end{center}$
$\end{figure*}$
$ Planets with radius in the 1-4R_{Earth} range are estimated to exist within a hundred days of orbital period around 30-50\% of all Sun-like stars  ([Lovis, Mayor and Bouchy 2009](), [Howard, Marcy and Bryson 2012](), [Fressin, Torres and Charbonneau 2013]()) .$
$In order to understand the nature of these objects, it is crucial to constrain both their masses and radii, and, thus, their densities. The bulk of exoplanet discoveries is done by transit surveys such as \textit{Kepler}/\textit{K2} and TESS, which measure the planets' radii. Then, the mass is typically estimated using the radial velocity method (RVs). For compact multi-planetary systems, the mass$
$can also be estimated using transit timing variations (TTVs).$
$In particular, when the period ratio of two planets is close to commensurability, namely, P_{out}/P_{in} \approx (k+q)/k, the planets can exhibit TTVs due to their proximity to the mean motion resonance (MMR). Despite their relative rarity  ([Fabrycky, Lissauer and Ragozzine 2014]()) , (nearly) resonant systems are over-represented in the population of planets with both mass and radius measurements because the four-year baseline of the \textit{Kepler} mission allowed estimations of their masses through TTVs at no additional cost.$
$Over the last decade, numerous studies \citep[e.g.][]{WuLi2013,WeissMarcy2014,Steffen2016,MillsMazeh2017,HaLi2017,Cubillos2017,Millholland2019,Leleu2023,Adibekyan2024} have noted (and discussed), the apparent discrepancy in density between the planets characterised by TTVs and radial velocities RVs. However, the origin of this discrepancy remains unclear: it could be due to sensitivity biases inherent to each method, with photometry biased towards larger planets and radial velocity biased towards more massive planets. Recent results also showed that part of the TTV-characterised population had underestimated densities due to the difficulty of extracting transit timings for low-signal-to-noise ratio (low-S/N) transits  ([Leleu, Delisle and Udry 2023]()) . [ and Hadden (2017)]() put forward a selection bias as possible explanation, since TTVs tend to allow the characterisation of small planets on larger orbital periods (hence, cooler orbits) than the bulk of the RV characterisation. It has also been proposed that the systems characterised by RVs and TTVs formed in different environments, such as with different disk metallicity  ([Adibekyan, Sousa and Barros 2024]()) . {However, the differences in physical properties could be due to the orbital configuration in which the planets are embedded \cite[e.g.][]{WeissMarcy2014,MillsMazeh2017,Goyal2023}, since TTVs mainly characterise  sub-Neptunes that are near mean motion resonances (MMRs), while the RV-characterised planets are more representative of the bulk of known exoplanets. } In this paper, we explore the possibility that there is an intrinsic connection between the densities of sub-Neptunes and their resonant orbital configurations.$
$\section{Controlled sample}$
$\label{sec:controlled}$
$The population shown in Fig. \ref{fig:full} is taken from the NASA Exoplanet Archive\footnote{\url{https://exoplanetarchive.ipac.caltech.edu/}}. As of 5 March 2024, the catalogue had 695 planets for which the mass and radius, as well as the host mass, radius, effective temperature, and metallicity are given, along with their uncertainties. The host properties are required in order to check for possible correlations between these parameters and the densities of the planets. Restricting this population to close-in systems with periods in the 5-60 days range and radii between 2 and 4 R_{Earth} has reduced this number to 133. The lower limit of the period range is chosen to avoid {the lower part of the Neptunian desert, which is partly shaped by photoevaporation  ([ and Owen 2018]()) .}$
$We newcommandine the (nearly) resonant population (in orange) as planets whose period ratio with an inner or outer planet satisfies \Delta_{MMR}<\DMMRthre , where$
$\be$
$\Delta_{MMR} = \left| \frac{P_{out}}{P_{in}}  - \frac{k+q}{k}\right|$
$\label{eq:DMMR}$
$,\ee$
$for q=1 and k\in [1,2,3,4,5] or q=2 and k\in [3,5]. While the non-resonant population (in blue) is newcommandined \Delta_{MMR}>\DMMRthre . This limit is set by the edge of the clump of nearly-resonant system found in \textit{Kepler}  ([Fabrycky, Lissauer and Ragozzine 2014]()) .$
$The (nearly) resonant population, in orange, appears to be composed of lower-density planets than the non-resonant population. However, these populations could be affected by numerous biases. Notably, the {(nearly-)resonant} population is mainly characterised by TTVs, while the {non-resonant} population is mainly characterised by RVs.$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.49\linewidth]{MR_controlled.pdf}$
$\includegraphics[width=0.49\linewidth]{cumu_controlled.pdf}$
$\caption{\label{fig:controlled} Same as Fig. \ref{fig:full}, for the controlled sample.$
$ The potential 2D correlation between parameters are explored in Fig. \ref{fig:controlled_fullcomp}.$
$}$
$\end{center}$
$\end{figure*}$
${$
$Regarding the RV-characterised masses, a possible bias could come through the follow-up observation process;$
$some planets could be dropped out$
$after few RV points were taken,$
$if the RV signature of the planets did not seem large enough.$
$This selection process would lead to a bias in the literature towards higher masses and higher densities for RV-characterised planets.$
$Using all sub-Neptunes in the 5-60 days range that were followed-up by HARPS or ESPRESSO, we show in Appendix \ref{ap:controlled} that such bias is absent for planets whose radius is above 2.7R_{Earth}, since 93\% (26 out of 28) of the followed-up planets in that radius range have published masses. These 20 RV-characterised planets are the first part of our controlled sample.$
$For TTV-characterised planets, the mass-radius relation can strongly be affected by mass-eccentricity degeneracies [Lithwick, Xie and Wu (2012)]() and the manner in which the TTVs are extracted from the light curves  ([Leleu, Delisle and Udry 2023]()) . To address the first point, we only use planets whose mass estimations have been shown to be robust against mass and eccentricity degeneracy \citep[e.g.][]{HaLi2017,Leleu2023}.$
$Regarding the second point, [Leleu, Delisle and Udry (2023)]() showed that TTVs are typically correctly estimated by usual methods if the signal to noise ratio of individual transits (S/N_i) is high enough.$
$As shown in Fig. \ref{fig:SNRi}, we checked that the planets with radius above 2.7 had an S/N_i > 3.5, which ensures that their individual transit timing can robustly be recovered  ([Leleu, Delisle and Udry 2023]()) .$
$Here also, it translates to the selection of stars that are bright enough and not overly active. If individual transits of planets can reliably be observed, there is no reason why we could not characterise denser planets, as their TTV signals would either be larger  ([Lithwick, Xie and Wu 2012]())  or faster  (NeVo2016) , depending on whether the pair is near or inside a MMR.$
$We therefore newcommandine our controlled sample as the planets in the 2.7 to 4 R_{Earth} range in the mass-radius diagram (shown in Fig. \ref{fig:controlled}). This sample, detailed in Table \ref{tab:controlled} is made of \nbpcontrolledres  (nearly) resonant planets and \nbpcontrollednres  non-resonant planets. Using the Kolmogorov-Smirnov test, we estimated that the radius distributions of these two populations have a p_{value}=\pcontrolledr   probability\footnote{Median and uncertainties on the p_{value} are estimated by drawing 1000 samples assuming a Gaussian distribution for the radius of each planet, then computing the 0.16, 0.5, and 0.84th quantiles of the resulting p_{value} distribution.} to be drawn from the same underlying population. However, the probability that their masses have been drawn from the same underlying population is p_{value}=\pcontrolledm. We then checked whether this discrepancy could be attributed to different stellar metallicities, equilibrium temperatures \citep[e.g.][]{HaLi2017}, or stellar effective temperatures. With respect to all of these quantities, the two populations are similar, with p_{value} of \pcontrolledteq, \pcontrolledmet  , and \pcontrolledteff  , respectively. Exploring possible 2D relations between these parameters, we performed 2D Kolmogorov-Smirnov tests  ([ and Peacock 1983]()) . These results are shown in Appendix \ref{ap:controlled}. Across all our tests, the p_{value} involving the mass are lower than the rest by two orders of magnitudes. We therefore conclude that the proximity to MMR is the main factor in the discrepancy between the mass, hence, the density, of the two sub-populations.$
$\section{Full sample}$
$\label{sec:full}$
$We go on to consider the full sample shown in Fig. \ref{fig:full}, taking the robust masses from [ and Hadden (2017)]() and [Leleu, Delisle and Udry (2023)]() when available. For this sample, the probability that the mass of the (nearly) resonant and non-resonant population is drawn form the same underlying population drops to p_{value}=\pfullm  , while the rest of the explored parameters are consistent between the two sub-population. The full 2D comparison is in Appendix \ref{fig:fullcomp}.$
$The 2D comparison was also performed for the full sample restricted to the 2.7-4R_{Earth} (see Fig. \ref{fig:parfullcomp}).$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.61\linewidth]{distres_rho.pdf}\includegraphics[width=0.388\linewidth]{rho_gauss.pdf}$
$\caption{\label{fig:DMMRrho} Relative density of planets as a function of the distance to the closest MMR (left). Resonant chains and single planets are arbitrarily set at \Delta_{MMR}=0.001 and 100, respectively.$
$The colors indicate the method used to obtain the mass. The dark grey area indicates the 1\sigma confidence interval for the local mean value of \rho_{rel}, while the dotted lines show a local estimation of its scatter.$
$\textit  Distribution of relative densities assuming gaussian distributions, binning by method and distance to the resonance (right): resonant, nearly resonant (0.001<\Delta_{MMR}<0.05), non resonant (0.05<\Delta_{MMR}), and single planets.$
$}$
$\end{center}$
$\end{figure*}$
$To compare the relative densities of planets with different radii, we newcommandined \rho_{rel} = \rho / \rho_{ref}, which is the ratio between the density measured for a planet and a reference density for a planet of the same radius, using the M-R relation from [Parc, et. al (2024)]() (black line in Fig. \ref{fig:full}). To further illustrate the effect of the proximity to MMR on the density of planets, in Fig. \ref{fig:DMMRrho} we show \rho_{rel} as a function of \Delta_{MMR}. Formally resonant planets (including resonant chains) are shown on the left, while single planets are shown on the right\footnote{The position of single planets in this figure is arbitrary, and there are possibly non-transiting planets in their system. However, given the relative rarity of planets near MMRs, we assume that they are part of the non-resonant population.}.$
$In the figure, we estimated the local mean and scatter of \rho_{rel} by fitting a Gaussian distribution in a box sliding over \log_{10}(\Delta_{MMR}) with a width of 1.$
$Most planets for which \Delta_{MMR}<0.05 have a relative density below 1, and this is even more so for the formally resonant systems, which have a mean relative density estimated at 0.63 \pm 0.05. On the contrary, planets for which \Delta_{MMR}>0.1 are more uniformly spread around \rho_{rel}=1 and single planets are on average denser, with a mean relative density of 1.38 \pm 0.10.$
$The right panel shows the envelope of the 0.16-0.84 quantiles of the Gaussian distributions of the relative density as a function of the mass measurement method used and the distance to the resonance. Only the nearly resonant population (0.001<\Delta_{MMR}<0.05) had enough measurements to be estimated by both methods. From this analysis, we have drawn three observations. First, the correlation between the distance to MMR and the relative density of sub-Neptunes is visible when using RV-characterised planets alone (grey, green, and dark green Gaussians). Second, RV- and TTV- characterised planets have similar relative density where they overlap in the nearly-resonant population (dark green and dark purple Gaussians). Third, the distribution of relative densities increase both in mean value and in spread away from the resonance, with resonant planets having similar relative densities, while single planets have a larger spread.$
$\section{Discussion}$
$\label{sec:discussion}$
$\begin{figure}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{distres_mutinc.pdf}$
$\caption{\label{fig:DMMRDi} Minimal mutual inclination of transiting confirmed planets. The dark grey area indicates the 1\sigma confidence interval for the local mean value of \Delta i_{min}, while the lighter grey area shows a local estimation of its scatter.}$
$\end{center}$
$\end{figure}$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{models.pdf}$
$\caption{\label{fig:models} Synthetic populations of close-in systems. Top: Population from [Izidoro, Bitsch and Raymond (2021)](), [Izidoro, Schlichting and Isella (2022)]() bottom: Population form NGPPS [Burn, Mordasini and Mishra (2024)]().  }$
$\end{center}$
$\end{figure*}$
$If the difference in density between (nearly) resonant and non-resonant planets is not due to observational or selection biases, nor to the type of star that the planets orbit, it must be a result of different formation and evolution pathways. For example, the (nearly) resonant planets could be puffier as a result of atmospheric inflation due to tidal heating  ([ and Millholland 2019](), [Millholland, Petigura and Batygin 2020]()) . This would arise if the (nearly) resonant have systematically larger eccentricities or planetary obliquities. This is supported by previous studies, which suggested that planets captured in mean-motion resonances might have their spin-axes tilted as a result of secular spin-orbit resonance capture during the orbital migration process  ([ and Millholland 2019](), [Millholland, Lara and Toomlaid 2024]()) .$
$Another hypothesis is that resonant and non-resonant planets were formed in different locations with diverse formation conditions. For example, [ and Lee (2016)]() posited that most super-Earths and sub-Neptunes formed in situ in gas-poor discs towards the end of the disc lifetime. However, they suggested that the rarer class of low-density "super-puff" planets formed outside \sim 1 AU and accreted thicker gaseous atmospheres due to more efficient cooling. Because they formed further out, they would have entered MMRs upon inward migration.$
$Alternatively, lower densities for planets in MMRs is expected by the model known as `breaking the chains' \citep[see section 4 of][]{Bean2021}.$
$In that model, close-in systems of sub-Neptunes form in resonant chains due to the migration of planets in the protoplanetary discs and the positive torque at the inner edge of the disc \citep[e.g.][]{GoTre1979,Weidenschilling1985,Masset2006,TePa2007}.$
$Resonant chains can then become dynamically unstable after the gaseous disc dissipates  ([ and Terquem 2007](), [ and Ogihara 2009](), [Cossou, et. al 2014]()) .$
$This model reproduces the observed period ratio and multiplicity distribution of the close-in sub-Neptune population if \approx 95\% of the chains become unstable after the disc dispersal  ([Izidoro, Ogihara and Raymond 2017]()) . Some of these instabilities lead to giant impacts which can eject part, or all, the primordial H/He atmospheres of the planets  ([ and Biersteker 2019]()) , resulting in non-resonant planets that are on average denser than their resonant counterpart.$
$This scenario leads to a second observable: known resonant chains tend to be remarkably coplanar \citep[e.g.][]{Agol2020,Leleu2021}, while planet-planet scattering is expected to increase the mutual inclination between planets. In Fig. \ref{fig:DMMRDi} we show the minimum mutual inclination (i.e. assuming that the ascending nodes of all the planets are aligned in the sky) of all pairs of planets that were confirmed in the exoplanet archive. Here, we can also see that pairs further away from MMRs have a larger scatter in the mutual inclination, in agreement with the 'breaking the chains' model. We note that the actual trend might be stronger, since here we only measure the minimal mutual inclination, the impact parameter of (nearly) resonant systems could be biased by unaccounted TTVs  ([ and García-Melendo 2011]())  and misaligned, spread-out systems have a lower transit probability.$
$The 'breaking the chains' mechanism \citep[e.g.][]{Izidoro2017,Izidoro2022} is also observed in different planet formation models \citep[e.g. NGPPS][]{Emsenhuber2021,Burn2024}.$
$In Fig. \ref{fig:models}, we show synthetic systems from [Izidoro, Bitsch and Raymond (2021)](), [Izidoro, Schlichting and Isella (2022)]() and NGPPS  ([Emsenhuber, Mordasini and Burn 2021](), [Burn, Mordasini and Mishra 2024]()) . These populations (described in Appendix \ref{ap:Andre} and \ref{ap:Remo}, respectively) simulate the formation of planetary systems from planet embryos in the proto-planetary discs up to \sim50 millions of years after the disc dispersal. As can be seen in the middle panels, both populations harbour lower-density planets for the (nearly) resonant sub-population, while the non-resonant populations have larger and more diverse relative densities. The right panels show that the mutual inclination between each planet pairs tend to be larger for larger distance to MMRs. In both populations, these features are linked with post-disc instabilities and collision, shown with star markers in Fig. \ref{fig:models}. These instabilities can be due to the configuration of the chain itself, but also to the existence of an outer more massive planet \citep[see Appendix \ref{ap:Remo} and also][]{Schlecker2021,Izidoro2022}. In addition, for the NGPPS population, the larger density of the non-resonant population is partially due to the accretion of more rocky embryos from the inner system during the giant impact stage after gas disk dissipation.$
$\section{Conclusion}$
$\label{sec:conclusion}$
$Our results support the idea that the apparent discrepancy between TTV- and RV-characterised planets is astrophysical and due to different formation and evolution pathways of the characterised populations, rather than a method-related bias. The significance of the controlled sample (p_{value} of \pcontrolledm) can be improved by a homogeneous analysis of the RV-characterised population, and a larger completion in the publication of mass measurement for smaller planets. For the TTVs, the population needs to be systematically checked for the mass or eccentricity degeneracies  ([ and Hadden 2017]())  and the robustness of the TTV extraction or the photo-dynamical analysis  ([Leleu, Delisle and Udry 2023]()) , as well as ensuring that small dense resonant planets are not missed due to large TTVs  ([Leleu, Chatel and Udry 2021](), [Leleu, Delisle and Mardling 2022]()) . In this study, we were also able to show that TTV and RV characterised planets had a similar relative density distribution for the nearly resonant population. A next step would be to get enough RV characterised resonant systems of sub-Neptunes, such as resonant chains, to check whether these results hold for that population. Finally, PLATO will enable the discovery and characterisation of systems by both TTVs and RVs, which should help to further alleviate the biases inherent to each method.$
$\bibliographystyle{aa}$
$\bibliography{biblio}$
$\begin{acknowledgements}$
$The authors acknowledge support from the Swiss NCCR PlanetS and the Swiss National Science Foundation. This work has been carried out within the framework of the NCCR PlanetS supported by the Swiss National Science Foundation under grants 51NF40_ 182901 and 51NF40_ 205606.$
$AL acknowledges support of the Swiss National Science Foundation under grant number  TMSGI2\_211697.$
$R.B. acknowledges the support from the German Research Foundation (DFG) under Germany’s Excellence Strategy EXC 2181/1-390900948, Exploratory project EP 8.4 (the Heidelberg STRUCTURES Excellence Cluster). This project has received funding from the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation programme (project {\sc Spice Dune}, grant agreement No 947634).$
$\end{acknowledgements}$
$\begin{appendix}$
$\section{Controlled sample}$
$\label{ap:controlled}$
$To test for the RV selection bias, we used the ESO public archive\footnote{\url{https://archive.eso.org}} to get the number of HARPS and/or ESPRESSO (outside of GTO) measurements$
$of all planets with radius between 2 and 4 R_{Earth} and period between 5 and 60 days.$
$We restricted our analysis to HARPS and ESPRESSO because the ESO archive allows for a query of the number of measurements for all observed targets.$
$\input{controlled_table.txt}$
$We then checked using the exoplanet archive whether these planets had a  published mass in the literature.$
$We considered targets that had at least 20 RV points taken by HARPS and/or ESPRESSO by March 2022.$
$We chose 20 points as a threshold because some targets might have been dropped early because of stellar activity or another reason not related to the mass of the planet.$
$We only looked at measurements taken before March 2022 to only consider planets for which the observers had enough time to analyse the data and publish the mass.$
$As we can see in Fig. \ref{fig:nb_equiv}, planets whose radius is below 2.7 R_{Earth} might be affected by a selection bias, as 24\% of these have not been published. On the other hand, only 7\% of the planets with a radius above the 2.7 R_{Earth} threshold have not been not published.$
$There are also three planets with radius above  2.7 R_{Earth} for which a mass is given on the archive, but no errorbar: CoRoT-24 b (Vmag=15.38), HIP 94235 b (Vmag=8.3), and K2-290 b (Vmag=11.11, triple star system). For CoRoT-24 b, [Alonso, Moutou and Endl (2014)]() reported a 1\sigma upper limit at 5.8M_{Earth}, but could not exclude that the planet was orbiting another star. For HIP 94235 b and K2-290 b, [Zhou, Wirth and Huang (2022)]() and [Hjorth, Justesen and Hirano (2019)]() reported 3\sigma upper limit at 379M_{Earth} and 21M_{Earth}, respectively; however, the authors did note that their dataset did not have the precision to reach the expected mass range for these planets. Therefore, we did not include these planets in our analysis.$
$Finally, we checked whether this sub-sample could have been pre-selected before their observation by HARPS or ESPRESSO. To do so, we checked the reference of each planet to see if points were taken by other facilities before their monitoring by one of these telescopes. We found only 4such instances out of the 20 planets (see notes in Table \ref{tab:controlled}). We therefore consider this sub-sample to be representative of the underlying population of planets between 5 and 60 days of the orbital periods and radii in the [2.7-4]  R_{Earth} range. The controlled sample is described in Table \ref{tab:controlled}. Possible 2D correlations between the planetary and/or stellar parameters of the two populations are explored in Fig. \ref{fig:controlled_fullcomp}.$
$\begin{figure}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{RV_neq.pdf}$
$\caption{\label{fig:nb_equiv} Number of points taken by Mars 2022 by HARPS and ESPRESSO facilities for planets in the 2 to 4 R_{Earth} range. Each ESPRESSO measurements are counted as five HARPS measurements to account for photon noise. The horizontal grey dashed line represents the radius cut-off above which we consider that the published mass-radius population is not affected by selection biases.$
$}$
$\end{center}$
$\end{figure}$
$\begin{figure}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{TTV_SNRi.pdf}$
$\caption{\label{fig:SNRi}$
$Radius of the \textit{Kepler} TTV characterised planets the full sample that came either from [ and Hadden (2017)]() or [Leleu, Delisle and Udry (2023)](), as function of the S/N of individual transits (S/N_i). The vertical grey dashed line shows the S/N_i=3.5 threshold above which we consider that large TTVs do not prevent the detection of the planet  ([Leleu, Delisle and Udry 2023]()) . The horizontal grey dashed line represent the radius cut-off above which we consider that the published mass-radius population is not affected by selection biases. }$
$\end{center}$
$\end{figure}$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{controlled_P5to60_R2p7to4.png}$
$\caption{\label{fig:controlled_fullcomp}$
$Controlled sample for planets in the 2.7 to 4 R_{Earth} range. (Nearly-)resonant planets are shown by orange points, while non-resonant planets are shown by blue points. The p_{value} shown on the diagonal are 1D Kolmogorov-Smirnov test, while the p_{value} on the bottom-left triangle are 2D Kolmogorov-Smirnov tests  ([ and Peacock 1983]()) . Median and uncertainties on the p_{value} are estimated by drawing 1000 samples assuming a Gaussian distribution for the radius of each planet, then computing the 0.16, 0.5, and 0.84th quantiles of the resulting p_{value} distribution.}$
$\end{center}$
$\end{figure*}$
$\section{Full sample}$
$Possible 2D correlations between the planetary and/or stellar parameters of the two populations are explored in Fig. \ref{fig:fullcomp} for the full sample given in Fig. \ref{fig:full}. The same analysis is also performed by restricting that sample to the 2.7 to 4 Earth radii range (see Fig. \ref{fig:parfullcomp}).$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{P5to60_R2to4.png}$
$\caption{\label{fig:fullcomp}$
$Same as Fig \ref{fig:controlled_fullcomp} but for the full sample shown in Fig. \ref{fig:full}, for radius between 2 and 4 Earth radii.$
$}$
$\end{center}$
$\end{figure*}$
$\begin{figure*}[!ht]$
$\begin{center}$
$\includegraphics[width=0.99\linewidth]{P5to60_R2p7to4.png}$
$\caption{\label{fig:parfullcomp}$
$Same as Fig \ref{fig:controlled_fullcomp}, but for the full sample shown in Fig. \ref{fig:full}, for a radius between 2.7 and 4 Earth radii.$
$}$
$\end{center}$
$\end{figure*}$
$\section{Synthetic planetary populations}$
$\subsection{Breaking the chains model  ([Izidoro, Ogihara and Raymond 2017]()) }$
$\label{ap:Andre}$
$Our initial set of simulated planetary systems originates from [Izidoro, Schlichting and Isella (2022)](), building upon prior simulations of the 'breaking the chains' scenario  ([Izidoro, Ogihara and Raymond 2017](), [Izidoro, Bitsch and Raymond 2021]()) . These simulations track the formation of super-Earths and sub-Neptunes by tracing the evolution of Moon-mass planetary seeds within a circumstellar disk. Various physical processes are considered, including gas-assisted pebble accretion \citep[e.g.][]{lambrechtsjohansen12}, gas-driven planet migration \citep[e.g.][]{baruteauetal14}, gas tidal damping of orbital parameters \citep[e.g.][]{cresswellnelson08}, and mutual gravitational interactions among planetary embryos.$
$In [Izidoro, Bitsch and Raymond (2021)](), planetary seeds grow through pebble accretion and mutual collisions. Pebbles beyond the snowline are presumed to contain 50\% water ice mass. As these pebbles migrate inward and cross the water snowline, they sublimate, losing their water component and releasing silicate grains.  Collisions are modelled as perfect merging events conserving mass and momentum.$
$The planet formation simulations of [Izidoro, Bitsch and Raymond (2021)]() new planetary mass and composition but not planet size or radius. To compare the simulations outcome with observational trends involving planet sizes such as the exoplanet radius valley and the peas-in-a-pod trend, [Izidoro, Schlichting and Isella (2022)]() employ mass-radius relationships  ([Zeng, Jacobsen and Sasselov 2019]())  to convert mass into planetary radius. In addition,  in this model, giant impacts (with projectile-to-target mass ratios greater than 0.1) occurring after gas disk dispersal strip primordial atmospheres, leaving behind either bare rocky or water-rich cores  ([ and Biersteker 2019]()) . According to [Izidoro, Schlichting and Isella (2022)](), approximately 80-90\% of late impacts qualify as giant impacts in their model. Their model does not account for the formation of secondary/outgassed atmospheres but for only for primordial atmospheres accreted during the gaseous disk phase (H/He rich).$
$The stability of a primordial atmosphere for a planet that did not experience giant impacts after gas dispersal is estimated using an energy-limited escape prescription, considering stellar X-ray and ultraviolet radiation  ([ and Owen 2017]()) . The criterion by [ and Misener (2021)]() compares atmospheric binding energy to the energy received by the planet from 100 million to 1 billion years. An energy ratio smaller than unit indicates sufficient energy for atmosphere photo-evaporation. Planet sizes are computed following different planet models, which account, or not, for the presence of a primordial atmosphere following the atmospheric instability criterion of [ and Misener (2021)]().$
$\subsection{New generation planetary population synthesis (NGPPS)}$
$\label{ap:Remo}$
$The second set of theoretical planetary system calculations was obtained from the planetary population synthesis exercise conducted by [Emsenhuber, Mordasini and Burn (2021)]() and updated by [Burn, Mordasini and Mishra (2024)](). This set of simulations use the Bern model of global planet formation and evolution  ([Emsenhuber, Mordasini and Burn 2021]()) . The starting point of the simulations are protoplanetary dust and solid disks around Solar-type stars with observation-informed distributions of initial conditions (\citealp{Tychoniec2018}, see also the discussion in \citealp{Emsenhuber2023}). The gas disk viscous surface density evolution equation  (Pringle1981)  is solved assuming an \alpha-viscosity of 2\times 10^{-3}, photoevaporative mass loss, and a consistent temperature structure using opacities from [ and Bell (1994)](), viscous and irradiation heating. Apart from 1\% dust used as opacity source, the solid disk is assumed to be present in the form of planetesimals that are modelled as fluid and accreted by 100 growing seed embryos randomly distributed in the disk at initialisation. The composition of gas, embryos and planetesimals, initialised following [Marboeuf, et. al (2014)](), is tracked during planet growth. Gas accretion onto the planets proceeds by cooling and contraction of the previously accreted gas which is modelled in one dimension assuming hydrostatic equilibrium and energy release at the boundary between the solid core and the gaseous envelope. The simulations explicitly model gravitational interactions between the embryos using the \texttt{mercury} code  ([ and Chambers 1999]())  where an additional force is added due to the interaction of the planets with the gaseous disk. The force is derived from prescriptions of type I  ([Paardekooper, et. al 2010](), [Paardekooper, Baruteau and Kley 2011]())  and II  ([Dittkrist, et. al 2014]())  migration timescales, as well as eccentricity and inclination damping  ([ and Coleman 2014]()) . This will commonly lead to an inwards movement at moderate excitation of the planets during the gas disk stage. If two embryos collide, we assume a perfect merging of the solid core (including volatile species) and immediate ejection of the hydrogen-helium envelope of the smaller of the two planets. The impact energy of the impacting core is then added as a luminosity source over a smoothing timescale on the core-envelope boundary of the larger target, which can lead to radius inflation and gas loss, namely, impact stripping. For a full description of the technical implementation, we refer to [Emsenhuber, Mordasini and Burn (2021)](). Here, we note that the perfect merging of volatiles, such as water, is a current model shortcoming and impact stripping of volatiles need to be included in future versions of the model. Nevertheless, the stripping of heavier water or carbon-bearing species is less efficient than that of the lighter hydrogen and helium  ([Burger, Bazsó and Schäfer 2020]()) .$
$The evolution of the 1000 NGPPS planetary systems, following 100 Myr of N-body integration with the aforementioned model, was re-calculated by [Burn, Mordasini and Mishra (2024)]() considering an improved equation of state for water  ([Haldemann, et. al 2020]())  for each simulated planet individually. If water or other ices were present from the formation modelling, they are mixed with any H/He left at this stage. For simplicity, all volatile species are modelled as water. The revised internal structure modelling results in significantly different, larger, radii of volatile-rich planet due to the lower density of supercritical water. Over typically 0.1 Gyr timescales, the loss of the mixture is calculated using a mass-weighted mass loss rate of the two constituents due to high-energy irradiation by the star  ([Kubyshkina, Fossati and Erkaev 2018](), kubyshkina2021, Johnstone2020) . The fraction of elements in the envelope different from hydrogen and helium, Z_{\rm env}, is kept constant motivated by numerical results in the regime of efficient mass loss  (Johnstone2020) . Here, we show the planetary population at an evolutionary age of 5 Gyr after star formation; however, we caution that due to computational limitations, the dynamical state represents a 100 Myr-old system and, in some cases, slightly varying masses (prior to atmospheric mass loss).$
$\begin{figure*}$
$    \centering$
$    \includegraphics[width=\linewidth]{a_m_scatter_NGPPS_tdisk_MMR_nMMR_new_layout.pdf}$
$    \caption{Simulated planetary systems from NGPPS hosting final planets comparable to the observed sample. The criteria are a radius between 2 and 4 R_{\oplus} and an orbital period between 5 and 60 days. Those planets are marked in the semi-major axis versus total mass plots (left four panels) with larger, dark outlined circles. The rest of the simulated planetary systems containing those planets is shown with transparent circles using the same color-code showing the mass of volatile species accreted as ices compared to the total core mass. The fraction of systems with giants f_{\rm giants} differs between the two samples. The lower two panels on the left show an earlier stage of the system at the time of disk dispersal. In addition, collision partners of the planets meeting the selection criteria are connected to them with lines. For visual guidance, the mass region from 3 to 20 M_{\rm Earth}, where most of the (nearly) resonant planets reside, is marked with grey lines.}$
$    \label{fig:NGPPS_a_m_scatter}$
$\end{figure*}$
$\begin{figure}$
$    \centering$
$    \includegraphics[width=\linewidth]{a_m_scatter_NGPPS_tdisk_MMR_nMMR_hists_only_new_layout.pdf}$
$    \caption{Distributions of synthetic NGPPS planets comparable to the observed sample. Top histogram shows the volatile core mass fraction distribution, while the lower panel shows the total mass of the planets meeting the selection criteria (as in Fig. \ref{fig:NGPPS_a_m_scatter} after 100 Myr).}$
$    \label{fig:NGPPS_hists}$
$\end{figure}$
$Finally, we briefly discuss the origin of the trends with distance to mean-motion resonance. To make a comparison  with the observed systems, we selected similar-sized, close-in planets, and display their properties in Fig. \ref{fig:models} as well as in Figs. \ref{fig:NGPPS_a_m_scatter} and \ref{fig:NGPPS_hists}. From the latter figures, we can see that more collisions occur after the end of the disk lifetime for the non-resonant population and that their volatile fraction is on average lower. In addition, they also have larger masses and (in order to meet the radius selection criterion) they have higher bulk densities.$
$\end{appendix}$
$\end{document}}\end{document}\end{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand$
$\newcommand{\troytitle}{{\fontfamily{pzc}\fontsize{20pt}{20pt}\fontseries{b}\selectfont TROY}}$
$\newcommand{\bysareh}[1]{\textcolor{red}{#1}}$
$\newcommand{\der}[2]{\frac{d#1}{d#2}}$
$\newcommand{\dron}[2]{\frac{\partial#1}{\partial#2}}$
$\newcommand{\tpr}[1]{{#1}}$
$\newcommand{\tap}[1]{\textcolor{blue}{#1}}$
$\newcommand{\tme}[1]{\textcolor{green}{#1}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\citeay}[1]{\citeauthor{#1}, \citeyear{#1}}$
$\newcommand$
$\newcommand{\t}{^\mathrm{T}}$
$\newcommand{\O}{\mathcal{O}}$</div>



<div id="title">

# Resonant sub-Neptunes are puffier

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.18991-b31b1b.svg)](https://arxiv.org/abs/2406.18991)<mark>Appeared on: 2024-06-28</mark> - 

</div>
<div id="authors">

A. Leleu, et al. -- incl., <mark>R. Burn</mark>

</div>
<div id="abstract">

**Abstract:** A systematic, population-level discrepancy exists between the densities of exoplanets whose masses have been measured with transit timing variations (TTVs) versus those measured with radial velocities (RVs). Since the TTV planets are predominantly nearly resonant, it is still unclear whether the discrepancy is attributed to detection biases or to astrophysical differences between the nearly resonant and non resonant planet populations. We defined a controlled, unbiased sample of $\nbpcontrolled$ sub-Neptunes characterised by _Kepler_ , TESS, HARPS, and ESPRESSO. We found that their density depends mostly on the resonant state of the system, with a low probability (of $\pcontrolledm$ ) that the mass of (nearly) resonant planets is drawn from the same underlying population as the bulk of sub-Neptunes. Increasing the sample to 133 sub-Neptunes reveals finer details: the densities of resonant planets are similar and lower than non-resonant planets, and both the mean and spread in density increase for planets that are away from resonance. This trend is also present in RV-characterised planets alone. In addition, TTVs and RVs have consistent density distributions for a given distance to resonance. We also show that systems closer to resonances tend to be more co-planar than their spread-out counterparts. These observational trends are also found in synthetic populations, where planets that survived in their original resonant configuration retain a lower density; whereas less compact systems have undergone post-disc giant collisions that increased the planet's density, while expanding their orbits. Our findings reinforce the claim that resonant systems are archetypes of planetary systems at their birth.

</div>

<div id="div_fig1">

<img src="tmp_2406.18991/./MR_full.png" alt="Fig5.1" width="50%"/><img src="tmp_2406.18991/./cumu_full.png" alt="Fig5.2" width="50%"/>

**Figure 5. -**  Full sample of \nbpfull   sub-Neptunes used in this study. The top-left panel shows the mass-radius relation. The (nearly)-resonant population is defined as  $\Delta_{MMR}<$\DMMRthre(see Eq. \ref{eq:DMMR}), while the non-resonant population is defined as  $\Delta_{MMR}>$\DMMRthre . The black line is the sub-Neptune mass-radius relation from [Parc, et. al (2024)](). Other panels show the cumulative distributions for parameters of the planets or their host star. The $p_{value}$ given for each parameter is the probability that the distribution of that parameter is drawn form the same underlying distribution for the (nearly) resonant and non-resonant populations. The potential 2D correlation between parameters is explored in Fig. \ref{fig:fullcomp}.
 (*fig:full*)

</div>
<div id="div_fig2">

<img src="tmp_2406.18991/./controlled_P5to60_R2p7to4.png" alt="Fig9" width="100%"/>

**Figure 9. -** 
Controlled sample for planets in the 2.7 to 4 $R_{Earth}$ range. (Nearly-)resonant planets are shown by orange points, while non-resonant planets are shown by blue points. The $p_{value}$ shown on the diagonal are 1D Kolmogorov-Smirnov test, while the $p_{value}$ on the bottom-left triangle are 2D Kolmogorov-Smirnov tests  ([ and Peacock 1983]()) . Median and uncertainties on the $p_{value}$ are estimated by drawing 1000 samples assuming a Gaussian distribution for the radius of each planet, then computing the 0.16, 0.5, and 0.84th quantiles of the resulting $p_{value}$ distribution. (*fig:controlled_fullcomp*)

</div>
<div id="div_fig3">

<img src="tmp_2406.18991/./models.png" alt="Fig8" width="100%"/>

**Figure 8. -**  Synthetic populations of close-in systems. Top: Population from [Izidoro, Bitsch and Raymond (2021)](), [Izidoro, Schlichting and Isella (2022)]() bottom: Population form NGPPS [Burn, Mordasini and Mishra (2024)]().   (*fig:models*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.18991"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

106  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

8  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
