# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

L. Xie  ->  Z.-L. Xie  |  ['L. Xie']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
W. Brandner  ->  W. Brandner  |  ['W. Brandner']
G. Chauvin  ->  G. Chauvin  |  ['G. Chauvin']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
J. Li  ->  J. Li  |  ['J. Li']
M. Zhang  ->  M. Zhang  |  ['M. Zhang']


J. Li  ->  J. Li  |  ['J. Li']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
Arxiv has 77 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2511.08674
extracting tarball to tmp_2511.08674... done.
Retrieving document from  https://arxiv.org/e-print/2511.08691


extracting tarball to tmp_2511.08691... done.


L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']


Issues with the citations
repeated bibliography entry: Bittner2018Lis
Retrieving document from  https://arxiv.org/e-print/2511.08862
extracting tarball to tmp_2511.08862... done.
Retrieving document from  https://arxiv.org/e-print/2511.09074


extracting tarball to tmp_2511.09074... done.
Retrieving document from  https://arxiv.org/e-print/2511.09107
extracting tarball to tmp_2511.09107... done.
Retrieving document from  https://arxiv.org/e-print/2511.09489


not a gzip file


Retrieving document from  https://arxiv.org/e-print/2511.09518
extracting tarball to tmp_2511.09518...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.08691-b31b1b.svg)](https://arxiv.org/abs/2511.08691) | **No TiO detected in the hot Neptune-desert planet LTT-9779 b in reflected light at high spectral resolution**  |
|| S. R. Vaughan, et al. -- incl., <mark>L. Kreidberg</mark> |
|*Appeared on*| *2025-11-13*|
|*Comments*| *20 pages, 15 figures, 7 tables. Accepted to Astronomy and Astrophysics*|
|**Abstract**|            LTT-9779 b is an inhabitant of the hot Neptune desert and one of only a few planets with a measured high albedo. Characterising the atmosphere of this world is the key to understanding what processes dominate in creating the hot Neptune desert. We aim to characterise the reflected light of LTT-9779 b at high spectral resolution to break the degeneracy between clouds and atmospheric metallicity. This is key to interpreting its mass loss history which may illuminate how it kept its place in the desert. We use the high resolution cross-correlation spectroscopy technique on four half-nights of ESPRESSO observations in 4-UT mode (16.4-m effective mirror) to constrain the reflected light spectrum of LTT-9779 b. We do not detect the reflected light spectrum of LTT-9779 b despite these data having the expected sensitivity at the level 100 ppm. Injection tests on the post-eclipse data indicate that TiO should have been detected for a range of different equilibrium chemistry models. Therefore this non-detection suggests TiO depletion in the western hemisphere however, this conclusion is sensitive to temperature which impacts the chemistry in the upper atmosphere and the reliability of the line list. Additionally, we are able to constrain the top of the western cloud deck to $P_{\text{top, western}}<10^{-2.0}$ bar and the top of the eastern cloud deck $P_{\text{top, eastern}}<10^{-0.5}$ bar, which is consistent with the predicted altitude of MgSiO$_3$ and Mg$_2$SiO$_4$ clouds from JWST NIRISS/SOSS. While we do not detect the reflected light spectrum of LTT-9779 b, we have verified that this technique can be used in practice to characterise the high spectral resolution reflected light of exoplanets so long as their spectra contain a sufficient number of deep spectral lines. Therefore this technique may become an important cornerstone of exoplanet characterisation with the ELT and beyond.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.08674-b31b1b.svg)](https://arxiv.org/abs/2511.08674) | **Where Giants Dwell: Probing the Environments of Early Massive Quiescent Galaxies**  |
|| G. D. Lucia, <mark>L. Xie</mark>, M. Hirschmann, F. Fontanot |
|*Appeared on*| *2025-11-13*|
|*Comments*| *11 pages, 10 figures, submitted to A&A*|
|**Abstract**|            We investigate the environments of massive quiescent galaxies at 3 < z < 5 using the GAlaxy Evolution and Assembly (GAEA) theoretical model. We select galaxies with stellar mass ~10^10.8 Msun and specific star formation rate below 0.3x t_Hubble, yielding in a sample of about 5,000 galaxies within a simulated volume of ~685 Mpc. These galaxies have formation times that cover well the range inferred from recent observational data, including a few rare objects with very short formation time-scales and early formation epochs. Model high-z quiescent galaxies are alpha-enhanced and exhibit a wide range of stellar metallicity, in broad agreement with current observational estimates. Massive high-z quiescent galaxies in our model occupy a wide range of environments, from void-like regions to dense knots at the intersections of filaments. Quiescent galaxies in underdense regions typically reside in halos that collapsed early and grew rapidly at high redshift, though this trend becomes difficult to identify observationally due to large intrinsic scatter in star formation histories. The descendants of high-z massive quiescent galaxies display a broad distribution in mass and environment by z=0, reflecting the stochastic nature of mergers. About one-third of these systems remain permanently quenched, while most rejuvenation events are merger-driven and more common in overdense regions. Our results highlight the diversity of early quiescent galaxies and caution against assuming that all such systems trace the progenitors of present day most massive clusters.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.08862-b31b1b.svg)](https://arxiv.org/abs/2511.08862) | **CoronaGraph Instrument Reference stars for Exoplanets (CorGI-REx) I. Preliminary Vetting and Implications for the Roman Coronagraph and Habitable Worlds Observatory**  |
|| J. Hom, et al. -- incl., <mark>W. Brandner</mark>, <mark>G. Chauvin</mark> |
|*Appeared on*| *2025-11-13*|
|*Comments*| *Accepted for Publication in AJ, 27 pages, 8 figures, 6 tables*|
|**Abstract**|            The upcoming Roman Coronagraph will be the first high-contrast instrument in space capable of high-order wavefront sensing and control technologies, a critical technology demonstration for the proposed Habitable Worlds Observatory (HWO) that aims to directly image and characterize habitable exoEarths. The nominal Roman Coronagraph observing plan involves alternating observations of a science target and a bright, nearby reference star. High contrast is achieved using wavefront sensing and control, also known as "digging a dark hole", where performance depends on the properties of the reference star, requiring V<3, a resolved stellar diameter <2 mas, and no stellar multiplicity. The imposed brightness and diameter criteria limit the sample of reference star candidates to high-mass main sequence and post-main sequence objects, where multiplicity rates are high. A future HWO coronagraph may have similarly restrictive criteria in reference star selection. From an exhaustive literature review of 95 stars, we identify an initial list of 40 primary and 18 reserve reference star candidates relevant to both the Roman Coronagraph and HWO. We present results from an initial survey of these candidates with high-resolution adaptive optics imaging and speckle interferometry and identify no new companions. We discuss the need for higher-contrast observations to sufficiently vet these reference star candidates prior to Roman Coronagraph observations along with the implications of reference star criteria on observation planning for Roman and HWO.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.09074-b31b1b.svg)](https://arxiv.org/abs/2511.09074) | **Mock Observations for the CSST Mission: HSTDM--Synthetic Data Generation**  |
|| S. Tan, et al. -- incl., <mark>X. Zhang</mark>, <mark>J. Li</mark> |
|*Appeared on*| *2025-11-13*|
|*Comments*| *22 pages, 13 figures*|
|**Abstract**|            The High Sensitivity Terahertz Detection Module (HSTDM), a key component of the backend modules on board the China Space Station Telescope (CSST), will offer great opportunities for the discovery of Terahertz Astronomy, with implications that extend well beyond China to the global astronomical this http URL is imperative that the raw data collected by HSTDM undergoes meticulous calibration and processing through the HSTDM data processing pipeline (HSTDM pipeline for short) to ensure the accuracy and effectiveness of the final science data to be archived for further this http URL process necessitates that the HSTDM pipeline address instrumental artifacts and effects as well as the coordination of data flow of the scheduled observing sequences under all observing modes of HSTDM within the CSST automated processing this http URL the understanding of CSST HSTDM data processing develops during the pipeline development stage, it becomes essential to assess the accuracy, the robustness and the performance of the HSTDM pipeline under all observing modes of HSTDM so that components of the HSTDM pipeline be rationally added, removed, amended or extended within the modular this http URL this paper, we develop practical simulation methods to facilitate this need. The contribution of synthetic data generation of HSTDM observation includes two parts: 1. HSTDM instrumental effect simulation based on both real testing profiles and simulated models; 2. Observing data flow generation based on HSTDM observing mode scenario. The simulation methods have been implemented and shown to be practical in testing the HSTDM pipeline during the development stage.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.09107-b31b1b.svg)](https://arxiv.org/abs/2511.09107) | **Detection of Lensed Gravitational Waves from dark matter halos with deep learning**  |
|| M. Sun, et al. -- incl., <mark>J. Li</mark>, <mark>M. Zhang</mark> |
|*Appeared on*| *2025-11-13*|
|*Comments*| **|
|**Abstract**|            Lensed gravitational waves (GWs) provide a new window into the study of dark matter substructures, yet the faint interference signatures they produce are buried in detector noise. To address this challenge, we develop a deep learning framework based on a residual one-dimensional convolutional neural network for lensed GW identification under multiband observations. The model directly processes multiband waveforms from binary neutron star systems, covering the early inspiral observed by the DECi-hertz Interferometer Gravitational wave Observatory (DECIGO) and the late inspiral observed by the Einstein Telescope (ET), corresponding approximately to the wave-optics and geometrical-optics regimes, respectively. It enables end-to-end classification of five classes: pure noise, unlensed GWs, and three representative lensed GWs corresponding to singular isothermal sphere (SIS), cored isothermal sphere (CIS), and Navarro-Frenk-White (NFW) profiles. A dataset of 10^6 simulated samples was constructed with signal-to-noise ratios (SNR) ranging from 5 to 100. The deep learning model with multiband observations achieves an accuracy of 97.0% and a macro-averaged F1 score of 0.97, significantly exceeding the single-detector performance, where DECIGO and ET reach 72.8% and 62.3%, respectively. Even in the low-SNR regime (SNR < 20), the model maintains an accuracy above 63%, while in the high-SNR regime (SNR > 80), its accuracy approaches 99.8%. These results demonstrate that multiband GW observations effectively enhance the detection of lensed GWs within complex noise environments, providing a robust and efficient pathway for the automated identification of lensed GWs in future multiband observations.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.09518-b31b1b.svg)](https://arxiv.org/abs/2511.09518) | **Warped Disk Galaxies: Statistical Properties from DESI Legacy Imaging Surveys DR8**  |
|| <mark>Y. Wang</mark>, et al. |
|*Appeared on*| *2025-11-13*|
|*Comments*| *16 pages, 11 figures, submitted. Comments are welcome*|
|**Abstract**|            Warped structures are often observed in disk galaxies, yet their physical origin is still under investigation. We present a systematic study of warped edge-on disk galaxies based on imaging data from the DESI Legacy Imaging Surveys DR8, with the expectation that this large sample size, enabled by wide-area surveys, will offer new perspectives on the formation of disk warps. Using a deep learning approach, we trained an EfficientNet-B3 convolutional neural network to classify the morphology of edge-on-disk galaxies into warped and non-warped categories. Our model was trained on a curated and visually verified set of labeled galaxy images and applied to a large dataset of over 595,651 edge-on disk galaxies selected from the Galaxy Zoo DESI catalog. Our results provide the largest warp catalog to date, consisting of 23996 warped edge-on disk galaxies, and reveal statistical trends between warp occurrence and galaxy properties. Compared to their non-warped counterparts, these warped disk galaxies tend to have bluer colors, lower stellar masses, higher gas fractions and star-formation rates, smaller Sérsic indices and larger disk sizes. In addition, warped disk galaxies show higher projected number densities of neighboring galaxies than their non-warped counterparts, particularly within \( R_{\mathrm{proj}} \lesssim 50~\mathrm{kpc} \), where the local number density is roughly twice as high.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2511.09489-b31b1b.svg)](https://arxiv.org/abs/2511.09489) | **Global Distribution of the Key Species on the Surface of Europa**  |
|| <mark>J. Li</mark>, Y. Shou, C. Li, X. Jia |
|*Appeared on*| *2025-11-13*|
|*Comments*| **|
|**Abstract**|            The icy surface of Europa is continuously bombarded by ions and electrons from Jupiter's magnetosphere. The bombardment of the particles dissociates water molecules on the surface of Europa and introduces impurities to the icy surface. Such processes lead to the generation of the non-water species on the surface of Europa. Such processes lead to the generation of the nonwater species on the surface of Europa. These chemical species are closely related to the chemistry of the icy crust and the subsurface ocean, as well as Europa's habitability. However, our knowledge of the global distribution of these species is limited due to the sparse satellite and telescope observations on Europa. In this study, we combine a Europa plasma model and a chemical-transport model to simulate the global distribution of the key nonwater species on the surface of Europa. The initial results from our model agree well with the existing observations on the distributions of H2SO4 and SO2 but they show a significant discrepancy with the observed distribution of H2O2. Sensitivity tests on the reaction rate coefficients indicate that the simulated global distribution of all three species fit the observations well if the reaction rate coefficients in the ice are reduced by one order of magnitude. This finding provides a useful constraint on the rate coefficient of the chemical reactions in the ice. Furthermore, our model predicts that the O2 on the surface ice of Europa is concentrated on the leading hemisphere. The simulated global distribution of the key species on Europa may provide useful guidance for future missions to Europa, such as Europa Clipper and JUICE.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2511.08691/./images/Injection_grid_post_c24.png', 'tmp_2511.08691/./images/inputs_to_spectra.png', 'tmp_2511.08691/./images/Injection_grid_pre_c24.png']
copying  tmp_2511.08691/./images/Injection_grid_post_c24.png to _build/html/
copying  tmp_2511.08691/./images/inputs_to_spectra.png to _build/html/
copying  tmp_2511.08691/./images/Injection_grid_pre_c24.png to _build/html/
exported in  _build/html/2511.08691.md
    + _build/html/tmp_2511.08691/./images/Injection_grid_post_c24.png
    + _build/html/tmp_2511.08691/./images/inputs_to_spectra.png
    + _build/html/tmp_2511.08691/./images/Injection_grid_pre_c24.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand$
$\newcommand{\ltt}{LTT-9779}$
$\newcommand{\lttb}{LTT-9779 b}$
$\newcommand{\arraystretch}{1.25}$</div>



<div id="title">

# No TiO detected in the hot Neptune-desert planet LTT-9779 b in reflected light at high spectral resolution

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2511.08691-b31b1b.svg)](https://arxiv.org/abs/2511.08691)<mark>Appeared on: 2025-11-13</mark> -  _20 pages, 15 figures, 7 tables. Accepted to Astronomy and Astrophysics_

</div>
<div id="authors">

S. R. Vaughan, et al. -- incl., <mark>L. Kreidberg</mark>

</div>
<div id="abstract">

**Abstract:** LTT-9779 b is an inhabitant of the hot Neptune desert and one of only a few planets with a measured high albedo. Characterising the atmosphere of this world is the key to understanding what processes dominate in reducing the number of short-period intermediate mass planets, creating the hot Neptune desert. We aim to characterise the reflected light of LTT-9779 b at high spectral resolution to break the degeneracy between clouds and atmospheric metallicity. This is key to interpreting its mass loss history which may illuminate how it kept its place in the desert. We use the high resolution cross-correlation spectroscopy technique on four half-nights of ESPRESSO observations in 4-UT mode (16.4-m effective mirror) to constrain the reflected light spectrum of $\lttb$ . We do not detect the reflected light spectrum of $\lttb$ despite these data having the expected sensitivity at the level 100 ppm. Injection tests on the post-eclipse data indicate that TiO should have been detected for a range of different equilibrium chemistry models. Therefore this non-detection suggests TiO depletion in the western hemisphere however, this conclusion is sensitive to temperature which impacts the chemistry in the upper atmosphere and the reliability of the line list. Additionally, we are able to constrain the top of the western cloud deck to $P_{\text{top, western}}<10^{-2.0}$ bar and the top of the eastern cloud deck $P_{\text{top, eastern}}<10^{-0.5}$ bar, which is consistent with the predicted altitude of $MgSiO_3$ and  $Mg_2$ $SiO_4$ clouds from JWST NIRISS/SOSS. While we do not detect the reflected light spectrum of $\lttb$ , we have verified that this technique can be used in practice to characterise the high spectral resolution reflected light of exoplanets so long as their spectra contain a sufficient number of deep spectral lines. Therefore this technique may become an important cornerstone of exoplanet characterisation with the ELT and beyond.

</div>

<div id="div_fig1">

<img src="tmp_2511.08691/./images/Injection_grid_post_c24.png" alt="Fig12" width="100%"/>

**Figure 12. -** Restrictions placed on the VMR as a function of cloud deck altitude for the post-eclipse observations. The injection recovery tests presented here rule out the models coloured in light brown. Additionally, we highlight the models in dark brown which, due to the saturation of a large number of the spectral lines present, have an average contrast ratio more than three sigma below the contrast of $\ltt$b at these wavelengths measured by JWST NIRISS/SOSS data. To compare these restrictions to more complex models, we also plot the EU$_{\text{VMRs}}$ for metallicities ranging from $0.1\times$ solar to $1000\times$ solar as a function of the altitude of the cloud deck for both the original (T.1, circles) and modified (T.2, crosses) temperature-profiles for the western-dayside. (*fig:recoverygridpost_companion*)

</div>
<div id="div_fig2">

<img src="tmp_2511.08691/./images/inputs_to_spectra.png" alt="Fig15" width="100%"/>

**Figure 15. -** The inputs used to create the `self-consistent' spectra with PICASO for each atmospheric segment (top row). The second row shows the temperature-pressure profiles of each segment with the solid line indicating the original profile and the shaded regions the one and two sigma errors. The dash line indicates the modification to the profile for the isothermal models. The dotted lines are the condensation curves for different cloud species. Previous works have favoured the presence of $Mg_2$$SiO_4$ and $MgSiO_3$ clouds over those of other types \citep{Hoyer2023, Radica2024a, Coulombe2025, Radica2025} thus we only consider $Mg_2$$SiO_4$ and $MgSiO_3$ clouds in our models. The third row shows the optical depth per atmospheric layer of the modelled $Mg_2$$SiO_4$ and $MgSiO_3$ clouds as a function of wavelength. In the eastern-dayside model, the top of the cloud deck forms at $10^{-1.5}$ bar and for the other two segments it is at $10^{-4}$ bar. The last row shows the VMRs of all the species used in these models for $10\times$ solar metallicity. These assume equilibrium chemistry with the original temperature-profile (solid lines) and modified profile (dashed lines). (*fig:spectra_inputs*)

</div>
<div id="div_fig3">

<img src="tmp_2511.08691/./images/Injection_grid_pre_c24.png" alt="Fig13" width="100%"/>

**Figure 13. -** As Figure \ref{fig:recoverygridpost_companion} but for the pre-eclipse observations and eastern-dayside EU$_{\text{VMRs}}$. (*fig:recoverygridpre_companion*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2511.08691"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

120  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

14  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
