# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
J. Li  ->  J. Li  |  ['J. Li']
P. Molliere  ->  P. Molliere  |  ['P. Molliere']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
P. Smith  ->  P. Smith  |  ['P. Smith']


Arxiv has 56 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2501.08372
Retrieving document from  https://arxiv.org/e-print/2501.08388


not a gzip file


extracting tarball to tmp_2501.08388...

 done.
Retrieving document from  https://arxiv.org/e-print/2501.08445


extracting tarball to tmp_2501.08445... done.


Issues with the citations
list index out of range
Retrieving document from  https://arxiv.org/e-print/2501.08548


extracting tarball to tmp_2501.08548... done.
Retrieving document from  https://arxiv.org/e-print/2501.08652
extracting tarball to tmp_2501.08652... done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.08445-b31b1b.svg)](https://arxiv.org/abs/2501.08445) | **Deep high-resolution L band spectroscopy in the $\beta$ Pictoris planetary system**  |
|| M. Janson, et al. -- incl., <mark>P. Molliere</mark> |
|*Appeared on*| *2025-01-16*|
|*Comments*| *18 pages, 21 figure, accepted for publication in A&A*|
|**Abstract**|            The beta Pictoris system, with its two directly imaged planets beta Pic b and beta Pic c and its well characterised debris disk, is a prime target for detailed characterisation of young planetary systems. Here, we present high-resolution and high-contrast LM band spectroscopy with CRIRES+ of the system, primarily for the purpose of atmospheric characterisation of beta Pic b. We developed methods for determining slit geometry and wavelength calibration based on telluric absorption and emission lines, as well as methods for PSF modelling and subtraction, and artificial planet injection, in order to extract and characterise planet spectra at a high S/N and spectral fidelity. Through cross-correlation with model spectra, we detected H2O absorption for planet b in each of the 13 individual observations spanning four different spectral settings. This provides a clear confirmation of previously detected water absorption, and allowed us to derive an exquisite precision on the rotational velocity of beta Pic b, v_rot = 20.36 +/- 0.31 km/s, which is consistent within error bars with previous determinations. We also observed a tentative H2O cross-correlation peak at the expected position and velocity of planet c; the feature is however not at a statistically significant level. Despite a higher sensitivity to SiO than earlier studies, we do not confirm a tentative SiO feature previously reported for planet b. When combining data from different epochs and different observing modes for the strong H2O feature of planet b, we find that the S/N grows considerably faster when sets of different spectral settings are combined, compared to when multiple data sets of the same spectral setting are combined. This implies that maximising spectral coverage is often more important than maximising integration depth when investigating exoplanetary atmospheres using cross-correlation techniques.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.08388-b31b1b.svg)](https://arxiv.org/abs/2501.08388) | **Phasing the Giant Magellan Telescope: Lab Experiments and First On-sky Demonstration**  |
|| M. Y. Kautz, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-01-16*|
|*Comments*| *34 pages, 26 figures*|
|**Abstract**|            The large apertures of the upcoming generation of Giant Segmented Mirror Telescopes will enable unprecedented angular resolutions that scale as $\propto$ $\lambda$/D and higher sensitivities that scale as $D^4$ for point sources corrected by adaptive optics. However, all will have pupil segmentation caused by mechanical struts holding up the secondary mirror [European Extremely Large Telescope and Thirty Meter Telescope] or intrinsically, by design, as in the Giant Magellan Telescope. These gaps will be separated by more than a typical atmospheric coherence length (Fried Parameter). The pupil fragmentation at scales larger than the typical atmospheric coherence length, combined with wavefront sensors with weak or ambiguous sensitivity to differential piston, can introduce differential piston areas of the wavefront known as "petal modes". Commonly used wavefront sensors, such as a pyramid WFS, also struggle with phase wrapping caused by >$\lambda$/2 differential piston WFE. We have developed the holographic dispersed fringe sensor, a single pupil-plane optic that employs holography to interfere the dispersed light from each segment onto different spatial locations in the focal plane to sense and correct differential piston between the segments. This allows for a very high and linear dynamic piston sensing range of approximately $\pm$10 $\mu$m. We have begun the initial attempts at phasing a segmented pupil utilizing the HDFS on the High Contrast Adaptive optics phasing Testbed and the Extreme Magellan Adaptive Optics instrument (MagAO-X) at the University of Arizona. Additionally, we have demonstrated use of the HDFS as a differential piston sensor on-sky for the first time. We were able to phase each segment to within $\pm\lambda$/11.3 residual piston WFE ($\lambda$ = 800 nm) of a reference segment and achieved ~50 nm RMS residual piston WFE across the aperture in poor seeing conditions.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.08548-b31b1b.svg)](https://arxiv.org/abs/2501.08548) | **LAMOST medium-resolution spectroscopic survey of Galactic Open Clusters (LAMOST-MRS-O): An overview of survey plan and preliminary results**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2025-01-16*|
|*Comments*| *16 pages, 11 figures. Accepted for publication in RAA*|
|**Abstract**|            As part of the LAMOST medium-resolution spectroscopic survey, the LAMOST-MRS-O is a non-time domain survey that aims to perform medium-resolution spectral observations for member stars in the open cluster area. This survey plans to obtain the spectroscopic parameters such as radial velocity and metal abundances of member stars and provide data support for further study on the chemical and dynamical characteristics and evolution of open clusters in combination with Gaia data. We have completed the observations on ten open cluster fields and obtained 235184 medium-resolution spectra of 133792 stars. Based on the data analyzed of LAMOST DR11V1.1, for some clusters of particular concern, it is found that the sampling ratio of members stars with Gmag < 15 mag can reach 70%, which indicates that the LAMOST-MRS-O has reached our initial design goal.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.08652-b31b1b.svg)](https://arxiv.org/abs/2501.08652) | **The polarisation behaviour of OJ 287 viewed through radio, millimetre and optical observations between 2015 and 2017**  |
|| J. Jormanainen, et al. -- incl., <mark>P. Smith</mark> |
|*Appeared on*| *2025-01-16*|
|*Comments*| *Accepted for publication in A&A*|
|**Abstract**|            OJ 287 is a bright blazar with century-long observations, and one of the strongest candidates to host a supermassive black hole binary. Its polarisation behaviour between 2015 and 2017 (MJD 57300-58000) contains several interesting events that we re-contextualise in this study. We collected optical photometric and polarimetric data from several telescopes and obtained high-cadence light curves from this period. In the radio band, we collected mm-wavelength polarisation data from the AMAPOLA program. We combined these with existing multifrequency polarimetric radio results and the results of very-long-baseline-interferometry imaging with the Global mm-VLBI Array at 86 GHz. In December 2015, an optical flare was seen according to the general relativistic binary black hole model. We suggest that the overall activity near the accretion disk and the jet base during this time may be connected to the onset of a new moving component K seen in the jet in March 2017. With the additional optical data, we find a fast polarisation angle rotation of 210 degrees coinciding with the December 2015 flare, hinting at a possible link between these events. Based on the 86-GHz images, we calculated a new speed of 0.12 mas/yr for K, which places it inside the core at the time of the 2015 flare. This speed also supports the scenario where the passage of K through the quasi-stationary feature S1 could have been the trigger for the very-high-energy gamma-ray flare of OJ 287 seen in February 2017. With the mm-polarisation data, we established that these bands follow the cm-band data but show a difference during the time of K passing through S1. This indicates that the mm-bands trace the substructures of the jet still unresolved in the cm-bands.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.08372-b31b1b.svg)](https://arxiv.org/abs/2501.08372) | **Euclid preparation LX. The use of HST images as input for weak-lensing image simulations**  |
|| E. Collaboration, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-01-16*|
|*Comments*| *Accepted by A&A. 23 pages, 20 figures, Euclid pre-launch key paper*|
|**Abstract**|            Data from the Euclid space telescope will enable cosmic shear measurements with very small statistical errors, requiring corresponding systematic error control level. A common approach to correct for shear biases involves calibrating shape measurement methods using image simulations with known input shear. Given their high resolution, Hubble Space Telescope (HST) galaxies can, in principle, be utilised to emulate Euclid observations. In this work, we employ a GalSim-based testing environment to investigate whether uncertainties in the HST point spread function (PSF) model or in data processing techniques introduce significant biases in weak-lensing (WL) shear calibration. We used single Sérsic galaxy models to simulate both HST and Euclid observations. We then `Euclidised' our HST simulations and compared the results with the directly simulated Euclid-like images. For this comparison, we utilised a moment-based shape measurement algorithm and galaxy model fits. Through the Euclidisation procedure, we effectively reduced the residual multiplicative biases in shear measurements to sub-percent levels. This achievement was made possible by employing either the native pixel scales of the instruments, utilising the Lanczos15 interpolation kernel, correcting for noise correlations, and ensuring consistent galaxy signal-to-noise ratios between simulation branches. However, the Euclidisation procedure requires further analysis on the impact of the correlated noise, to estimate calibration bias. Additionally, we conducted an in-depth analysis of the accuracy of TinyTim HST PSF models using star fields observed in the F606W and F814W filters. We observe that F606W images exhibit a broader scatter in the recovered best-fit focus, compared to those in the F814W filter.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2501.08445/./inj_h2o.png', 'tmp_2501.08445/./mcmc_cornerplot.png', 'tmp_2501.08445/./mainmolecules.png']
copying  tmp_2501.08445/./inj_h2o.png to _build/html/
copying  tmp_2501.08445/./mcmc_cornerplot.png to _build/html/
copying  tmp_2501.08445/./mainmolecules.png to _build/html/
exported in  _build/html/2501.08445.md
    + _build/html/tmp_2501.08445/./inj_h2o.png
    + _build/html/tmp_2501.08445/./mcmc_cornerplot.png
    + _build/html/tmp_2501.08445/./mainmolecules.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Deep high-resolution L band spectroscopy in the $\beta$ Pictoris planetary system$\thanks{Based on observations from the European Southern Observatory, Chile (Programme 0110.C-4301(A)).}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.08445-b31b1b.svg)](https://arxiv.org/abs/2501.08445)<mark>Appeared on: 2025-01-16</mark> -  _18 pages, 21 figure, accepted for publication in A&A_

</div>
<div id="authors">

M. Janson, et al. -- incl., <mark>P. Molliere</mark>

</div>
<div id="abstract">

**Abstract:** The $\beta$ Pictoris system, with its two directly imaged planets $\beta$ Pic b and $\beta$ Pic c and its well characterised debris disk, is a prime target for detailed characterisation of young planetary systems. Here, we present high-resolution and high-contrast LM band spectroscopy with CRIRES+ of the system, primarily for the purpose of atmospheric characterisation of $\beta$ Pic b. We developed methods for determining slit geometry and wavelength calibration based on telluric absorption and emission lines, as well as methods for point spread function (PSF) modelling and subtraction, and artificial planet injection, in order to extract and characterise planet spectra at a high signal-to-noise ratio ( $S/N$ ) and spectral fidelity. Through cross-correlation with model spectra, we detected $H_2$ O absorption for planet b in each of the 13 individual observations spanning four different spectral settings. This provides a clear confirmation of previously detected water absorption, and allowed us to derive an exquisite precision on the rotational velocity of $\beta$ Pic b, $v_{\rm rot} = 20.36 \pm 0.31$ km/s, which is consistent within error bars with previous determinations. We also observed a tentative $H_2$ O cross-correlation peak at the expected position and velocity of planet c; the feature is however not at a statistically significant level. Despite a higher sensitivity to SiO than earlier studies, we do not confirm a tentative SiO feature previously reported for planet b. When combining data from different epochs and different observing modes for the strong $H_2$ O feature of planet b, we find that the $S/N$ grows considerably faster when sets of different spectral settings are combined, compared to when multiple data sets of the same spectral setting are combined. This implies that maximising spectral coverage is often more important than maximising integration depth when investigating exoplanetary atmospheres using cross-correlation techniques.

</div>

<div id="div_fig1">

<img src="tmp_2501.08445/./inj_h2o.png" alt="Fig11" width="100%"/>

**Figure 11. -** Left: CCFs in units of $S/N$ for $H_2$O in the atmosphere of $\beta$ Pic b. Thick red line: Actual CCF for the location of the planet. Thick black line: CCF for the location at the opposite side of the star. Thin lines: CCF for injected $H_2$O, colour coded by different volume mixing ratios as shown in the colour bar. Right: $S/N$ of injected signals as function of VMR. The green shaded area shows at which range of VMRs the molecule would have been marginally detectable. (*f:injh2o*)

</div>
<div id="div_fig2">

<img src="tmp_2501.08445/./mcmc_cornerplot.png" alt="Fig5" width="100%"/>

**Figure 5. -** Results from MCMC fitting of the spin velocity and limb darkening parameter. Upper left: Histogram for the spin velocity. Upper right: Evolution of the spin velocity (blue) and limb darkening parameter (gold) along the MCMC random walk. Lower left: Correlation between spin velocity and limb darkening. Lower right: Histogram for the limb darkening parameter. (*f:broadening*)

</div>
<div id="div_fig3">

<img src="tmp_2501.08445/./mainmolecules.png" alt="Fig6" width="100%"/>

**Figure 6. -** Model spectra after continuum subtraction, showing where strong lines occur for four different molecules: $H_2$O in blue, CO in red, $CH_4$ in green, and SiO in orange. The grey regions are the wavelength ranges covered by our observations. (*f:mainmol*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.08445"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

311  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

16  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
