# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

S. Zieba  ->  S. Zieba  |  ['S. Zieba']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
L. Boogaard  ->  L. Boogaard  |  ['L. Boogaard']
R. v. Boekel  ->  R. v. Boekel  |  ['R. v. Boekel']
J. Li  ->  J. Li  |  ['J. Li']
T. Müller  ->  T. Müller  |  ['T. Müller']
Arxiv has 67 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2401.12276
extracting tarball to tmp_2401.12276...

 done.




Found 120 bibliographic references in tmp_2401.12276/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2401.12289
extracting tarball to tmp_2401.12289... done.




✘ → 0:header
  ↳ 5543:\section{Introduction}


✔ → 5543:\section{Introduction}
  ↳ 12296:\section{Data: A2744 viewed by MUSE and \textit{JWST}/NIRCam}
✔ → 12296:\section{Data: A2744 viewed by MUSE and \textit{JWST}/NIRCam}
  ↳ 18622:\section{SED fitting with \cg}


✔ → 18622:\section{SED fitting with \cg}
  ↳ 21014:\section{Results}


✔ → 21014:\section{Results}
  ↳ 36662:\section{Conclusions}
✔ → 36662:\section{Conclusions}
  ↳ 42196:\begin{appendix}
✔ → 42196:\begin{appendix}
  ↳ 42213:\section{\cg Fitting Details}


✔ → 42213:\section{\cg Fitting Details}
  ↳ 46010:end
Retrieving document from  https://arxiv.org/e-print/2401.12404


list index out of range


extracting tarball to tmp_2401.12404... done.


Found 26 bibliographic references in tmp_2401.12404/betelgeuse.bbl.
Retrieving document from  https://arxiv.org/e-print/2401.12429
extracting tarball to tmp_2401.12429... done.
Retrieving document from  https://arxiv.org/e-print/2401.12679
extracting tarball to tmp_2401.12679...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.12276-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.12276) | **Characterization of K2-167 b and CALM, a new stellar activity mitigation  method**  |
|| Z. L. d. Beurs, et al. -- incl., <mark>S. Zieba</mark>, <mark>L. Kreidberg</mark> |
|*Appeared on*| *2024-01-24*|
|*Comments*| *20 pages, 16 figures, accepted for publication in MNRAS*|
|**Abstract**| We report precise radial velocity (RV) observations of HD 212657 (= K2-167), a star shown by K2 to host a transiting sub-Neptune-sized planet in a 10 day orbit. Using Transiting Exoplanet Survey Satellite (TESS) photometry, we refined the planet parameters, especially the orbital period. We collected 74 precise RVs with the HARPS-N spectrograph between August 2015 and October 2016. Although this planet was first found to transit in 2015 and validated in 2018, excess RV scatter originally limited mass measurements. Here, we measure a mass by taking advantage of reductions in scatter from updates to the HARPS-N Data Reduction System (2.3.5) and our new activity mitigation method called CCF Activity Linear Model (CALM), which uses activity-induced line shape changes in the spectra without requiring timing information. Using the CALM framework, we performed a joint fit with RVs and transits using EXOFASTv2 and find $M_p = 6.3_{-1.4}^{+1.4}$ $M_{\oplus}$ and $R_p = 2.33^{+0.17}_{-0.15}$ $R_{\oplus}$, which places K2-167 b at the upper edge of the radius valley. We also find hints of a secondary companion at a $\sim$ 22 day period, but confirmation requires additional RVs. Although characterizing lower-mass planets like K2-167 b is often impeded by stellar variability, these systems especially help probe the formation physics (i.e. photoevaporation, core-powered mass loss) of the radius valley. In the future, CALM or similar techniques could be widely applied to FGK-type stars, help characterize a population of exoplanets surrounding the radius valley, and further our understanding of their formation. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.12404-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.12404) | **Images of Betelgeuse with VLTI/MATISSE across the Great Dimming**  |
|| J. Drevon, et al. -- incl., <mark>R. v. Boekel</mark> |
|*Appeared on*| *2024-01-24*|
|*Comments*| **|
|**Abstract**| From Nov. 2019 to May 2020, the red supergiant star Betelgeuse experienced an unprecedented drop of brightness in the visible domain called the great dimming event. Large atmospheric dust clouds and large photospheric convective features are suspected to be responsible for it. To better understand the dimming event, we used mid-infrared long-baseline spectro-interferometric measurements of Betelgeuse taken with the VLTI/MATISSE instrument before (Dec. 2018), during (Feb. 2020), and after (Dec. 2020) the GDE. We present data in the 3.98 to 4.15\,$\mu$m range to cover SiO spectral features molecules as well as adjacent continuum. We have employed geometrical models, image reconstruction, as well as radiative transfer models to monitor the spatial distribution of SiO over the stellar surface. We find a strongly in-homogeneous spatial distribution of SiO that appears to be looking very different between our observing epochs, indicative of a vigorous activity in the stellar atmosphere. The contrast of our images is small in the pseudo-continuum for all epochs, implying that our MATISSE observations support both cold spot and dust cloud model. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.12429-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.12429) | **A new route to massive hot subdwarfs: common envelope ejection from  asymptotic giant branch stars**  |
|| Z. Li, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-01-24*|
|*Comments*| *9 pages, 5 figures, accepted for publication in ApJ*|
|**Abstract**| The hot subdwarf O/B stars (sdO/Bs) are known as extreme horizontal branch stars, which is of great importance in stellar evolution theory. The sdO/Bs are generally thought to have a helium-burning core and a thin hydrogen envelope $(M_{\rm env }<0.02M_\odot)$. In the canonical binary evolution scenario, sdO/Bs are considered to be the stripped cores of red giants. However, such a scenario cannot explain the recently discovered sdO/B binary, SMSS J1920, where the strong Ca H$\&$K lines in the spectrum are found. It suggests that this binary is likely originated from the recent ejection of common envelope (CE). In this {work}, we proposed a new formation channel of massive sdO/Bs, namely sdO/Bs produced from a CE ejection process with an asymptotic giant branch (AGB) star (hereafter AGB CE channel). We constructed the evolutionary model of sdO/Bs and successfully explained most of the important observed parameters of the sdO/B star in SMSS J1920, including the evolutionary age, sdO/B mass, effective temperature, surface gravity and surface helium abundance. The minimum sdO/B mass produced from the AGB CE channel is about $0.48M_\odot$. The evolutionary tracks in $\log T_{\rm eff}-\log g$ plane {may explain a fraction of the observational samples} with high-$\log T_{\rm eff}$ and low-$\log g$. Considering wind mass-loss of sdO/Bs, the model could produce helium-rich hot subdwarfs with $\log (n_{\rm He}/n_{\rm H})\gtrsim-1$. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.12679-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.12679) | **On the visible and thermal light curve of the large Kuiper belt object  (50000) Quaoar**  |
|| C. Kiss, et al. -- incl., <mark>T. Müller</mark> |
|*Appeared on*| *2024-01-24*|
|*Comments*| *Accepted for publication in Astronomy and Astrohysics*|
|**Abstract**| Recent stellar occultations allowed accurate instantaneous size and apparent shape determinations of the large Kuiper belt object (50000) Quaoar and detected two rings with spatially variable optical depth. In this paper we present new visible range light curve data of Quaoar from the Kepler/K2 mission, and thermal light curves at 100 and 160\,$\mu$m obtained with Herschel/PACS. K2 data provide a single-peaked period of 8.88 h, very close to the previously determined 8.84 h, and it favours an asymmetric double-peaked light curve with 17.76 h period. We clearly detected a thermal light curve with relative amplitudes of $\sim$10% both at 100 and 160 $\mu$m. A detailed thermophysical modeling of the system shows that the measurements can be best fitted with a triaxial ellipsoid shape, with a volume-equivalent diameter of 1090 km, and axis ratios of a/b = 1.19, and b/c = 1.16. This shape matches the published occultation shape, as well as visual and thermal light curve data. The radiometric size uncertainty remains relatively large ($\pm$40 km) as the ring and satellite contributions to the system-integrated flux densities are unknown. In the less likely case of negligible ring/satellite contributions, Quaoar would have a size above 1100 km and a thermal inertia $\leq$ 10 Jm$^{-2}$K$^{-1}$s$^{-1/2}$. A large and dark Weywot in combination with a possible ring contribution would lead to a size below 1080 km in combination with a thermal inertia $\gtrsim$ 10 Jm$^{-2}$K$^{-1}$s$^{-1/2}$, notably higher than that of smaller Kuiper belt objects with similar albedo and colours. We find that Quaoar's density is in the range 1.67-1.77 g/cm$^3$ significantly lower than previous estimates. This density value fits very well to the relationship observed between the size and density of the largest Kuiper belt objects. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.12289-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.12289) | **Galaxy main sequence and properties of low-mass Lyman-alpha Emitters  towards reionisation viewed by VLT/MUSE and JWST/NIRCam**  |
|| I. Goovaerts, et al. -- incl., <mark>L. Boogaard</mark> |
|*Appeared on*| *2024-01-24*|
|*Comments*| *10 pages, 5 figures, accepted for publication in Astronomy and Astrophysics*|
|**Abstract**| Faint, star-forming galaxies likely play a dominant role in cosmic reionisation. Strides have been made in recent years to characterise these populations at high redshifts ($z>3$). Now for the first time, with JWST photometry beyond 1$\,\mu m$ in the rest frame, we can derive accurate stellar masses and position these galaxies on the galaxy main sequence. We seek to assess the place of 96 individual Lyman-alpha emitters (LAEs) selected behind the A2744 lensing cluster with MUSE spectroscopy on the galaxy main sequence. We also compare derived stellar masses to Lyman-alpha luminosities and equivalent widths to better quantify the relationship between the Lyman-alpha emission and the host galaxy. These 96 LAEs lie in the redshift range $2.9<z<6.7$, and their range of masses extends down to $10^6\,\mathrm{M_{\odot}}$ (over half with $\mathrm{M_{\star}}<10^8\,\mathrm{M_{\odot}}$). We use the JWST/NIRCam and HST photometric catalogs from the UNCOVER project, giving us excellent wavelength coverage from $450\,\mathrm{nm}$ to $4.5\,\mu m$. We find a main sequence relation for these low mass LAEs of the form: $\mathrm{log\,SFR}=(0.88\pm0.07 - 0.030\pm0.027\times t)\,\mathrm{log\,M_{\star}} - ( 6.31\pm0.41 - 0.08\pm0.37\times t)$. This is in approximate agreement with best-fits of previous collated studies, however, with a steeper slope and a higher normalisation. This indicates that low-mass LAEs towards the epoch of reionisation lie above typical literature main sequence relations derived at lower redshift and higher masses. Additionally, comparing our results to UV-selected samples, we see that while low-mass LAEs lie above these typical main sequence relations, they are likely not singular in this respect at these masses and redshifts. While low-mass galaxies have been shown to play a significant role in cosmic reionisation, our results point to no special position for LAEs in this regard. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error list index out of range</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2401.12276.md
    + _build/html/tmp_2401.12276/./Figures_used_mnras/paper_plots_fig4_withletters.png
    + _build/html/tmp_2401.12276/./Figures_updated_referee/Fig8_cropped.png
    + _build/html/tmp_2401.12276/./Figures_updated_referee/Periodogram_shifted5CCFs__all_data23_11_30_12_00_09PM_cropped.png
exported in  _build/html/2401.12404.md
    + _build/html/tmp_2401.12404/./Images/2018-12_PAPER_mod.png
    + _build/html/tmp_2401.12404/./Images/2020-02_PAPER_mod.png
    + _build/html/tmp_2401.12404/./Images/2020-12_PAPER_mod.png
    + _build/html/tmp_2401.12404/./Images/TOTAL_LM_FLUX.png
    + _build/html/tmp_2401.12404/./Images/2018-12_PAPER_modified1.png
    + _build/html/tmp_2401.12404/./Images/2020-02_PAPER_modified2.png
    + _build/html/tmp_2401.12404/./Images/2020-12_PAPER_modified3.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\bjdtdb}{\ensuremath{\rm{BJD_{TDB}}}}$
$\newcommand{\feh}{\ensuremath{\left[{\rm Fe}/{\rm H}\right]}}$
$\newcommand{\teff}{\ensuremath{T_{\rm eff}}}$
$\newcommand{\teq}{\ensuremath{T_{\rm eq}}}$
$\newcommand{\ecosw}{\ensuremath{e\cos{\omega_*}}}$
$\newcommand{\esinw}{\ensuremath{e\sin{\omega_*}}}$
$\newcommand{\msun}{\ensuremath{ M_\Sun}}$
$\newcommand{\rsun}{\ensuremath{ R_\Sun}}$
$\newcommand{\lsun}{\ensuremath{ L_\Sun}}$
$\newcommand{\mj}{\ensuremath{ M_{\rm J}}}$
$\newcommand{\mp}{\ensuremath{M_{p}}}$
$\newcommand{\rj}{\ensuremath{ R_{\rm J}}}$
$\newcommand{\me}{\ensuremath{ M_{\oplus}}}$
$\newcommand{\re}{\ensuremath{ R_{\oplus}}}$
$\newcommand{\fave}{\langle F \rangle}$
$\newcommand{\fluxcgs}{10^9 erg s^{-1} cm^{-2}}$
$\newcommand{\thisstar}{K2-167}$
$\newcommand{\thisplanet}{K2-167 b}$
$\newcommand{\olddrsrmsRAW}{\ensuremath{4.02}}$
$\newcommand{\olddrssigmaRAW}{\ensuremath{3.95}}$
$\newcommand{\olddrssigmaAC}{\ensuremath{2.75}}$
$\newcommand{\olddrssigmaUS}{\ensuremath{2.04}}$
$\newcommand{\newdrsrmsRAW}{\ensuremath{3.01}}$
$\newcommand{\newdrssigmaRAW}{\ensuremath{3.01}}$
$\newcommand{\newdrssigmaAC}{\ensuremath{2.78}}$
$\newcommand{\newdrssigmaUS}{\ensuremath{2.41}}$
$\newcommand{\litperiod}{\ensuremath{9.978543}}$
$\newcommand{\bonomomass}{\ensuremath{6.5_{-1.5}^{+1.6}}}$
$\newcommand{\erikamass}{\ensuremath{7.0^{+1.7}_{-1.7}}}$
$\newcommand{\calmmass}{\ensuremath{6.3_{-1.4}^{+1.4}}}$
$\newcommand{\erikaradius}{\ensuremath{2.33^{+0.17}_{-0.15}}}$
$\newcommand{\bonomoK}{\ensuremath{1.97_{-0.55}^{+1.1}}}$
$\newcommand{\erikaK}{\ensuremath{1.88^{+0.49}_{-0.48}}}$
$\newcommand{\calmK}{\ensuremath{1.94^{+0.44}_{-0.44}}}$
$\newcommand{\GenevaObservatory}{Observatoire de Genève, Université de Genève, 51 chemin des Maillettes, 1290 Versoix, Switzerland}$
$\newcommand{\kms}{\ensuremath{\rm km s^{-1}}}$
$\newcommand{\cms}{\ensuremath{\rm cm s^{-1}}}$
$\newcommand{\ms}{\ensuremath{\rm m s^{-1}}}$
$\newcommand{\TESS}{\emph{TESS}}$
$\newcommand{\Ktwo}{\emph{K2}}$
$\newcommand{\calm}{\emph{CALM}}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\thefigure}{A.\arabic{figure}}$
$\newcommand{\thetable}{A.\arabic{table}}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Characterization of $\thisplanet$ and $\rredit{CALM,}$ a new stellar activity mitigation method

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2401.12276-b31b1b.svg)](https://arxiv.org/abs/2401.12276)<mark>Appeared on: 2024-01-24</mark> -  _20 pages, 16 figures, accepted for publication in MNRAS_

</div>
<div id="authors">

Z. L. d. Beurs, et al. -- incl., <mark>S. Zieba</mark>, <mark>L. Kreidberg</mark>

</div>
<div id="abstract">

**Abstract:** We report precise radial velocity (RV) observations of HD 212657 (= $\thisstar$ ), a star shown by $\Ktwo$ to host a transiting sub-Neptune-sized planet in a 10 day orbit. Using Transiting Exoplanet Survey Satellite ( $\TESS$ ) $\redit{photometry}$ , we refined $\redit{the}$ planet parameters, especially the orbital period. We collected 74 precise RVs with the HARPS-N spectrograph between August 2015 and October 2016. Although this planet was first found $\redit{to transit}$ in 2015 and validated in 2018, excess RV scatter originally limited $\redit{mass measurements. Here,}$ we $\redit{measure}$ a mass $\redit{by}$ taking advantage of reductions in scatter from updates to the HARPS-N Data Reduction System (2.3.5) and $\redit{our}$ new activity mitigation method called CCF Activity Linear Model ( $\calm$ ) $\redit{, which}$ uses activity-induced line shape changes in the spectra without requiring timing information. Using the $\calm$ framework, we performed a joint fit with RVs and transits using EXOFASTv2 and find $M_p = $ $\calmmass$ $\me$ and $R_p = $ $\erikaradius$ $\re$ , which places $\thisplanet$ at the upper edge of the radius valley. We also find hints of a secondary companion at a $\sim$ 22 day period, but $\redit{confirmation requires additional RVs. Although characterizing lower-mass planets like \thisplanet is often impeded by stellar variability, these systems}$ especially help probe the $\redit{formation}$ physics (i.e. photoevaporation, core-powered mass loss) of the radius valley. In the future, $\redit{\calm}$ or similar techniques could be widely applied to FGK-type stars, $\redit{help}$ characterize a population of exoplanets $\redit{surrounding}$ the radius valley, and $\redit{further our understanding}$ of their formation.

</div>

<div id="div_fig1">

<img src="tmp_2401.12276/./Figures_used_mnras/paper_plots_fig4_withletters.png" alt="Fig9" width="100%"/>

**Figure 9. -** Overfitting diagnostics for stellar activity RV analysis using the new DRS (2.3.5) \bedit{HARPS-N $\Delta$CCFs}. $\re$dit{The three columns in this Figure show the $\Delta$CCFs, the weight parameters corresponding to the $\Delta$CCFs, and the $\re$dit{phase-folded} RVs for three scenarios respectively. For each of the Figures in the third column \rredit{(c, f, i)}, we provide a legend which includes the scatter of the radial velocities from the HARPS-N pipeline (raw rvs, std) in $\ms$, the scatter of the activity corrected radial velocities (corr rvs, std) in $\ms$, and the predicted semi-amplitude of the planet (planet preds, K) in $\ms$. The first two rows corresponds to two different overfitting concerns and the last row demonstrates a case where both of these concerns are addressed.}  In the first row \rredit{(a, b, c)}, we plot the $\Delta$CCFs that have not been shifted to be centered at the median velocity. Without centering the $\Delta$CCFs, the algorithm will be able to access translational shift \bedit{(i.e. \beditr{D}oppler shifts)} information and attribute potential planet signals to stellar activity signals\beditrr{, significantly attenuating their amplitude}.
    In the second row\rredit{(d, e, f)}, we show a case where the $\Delta$CCFs are shifted but the number of indexes is too large\beditrr{. T}his results in overfitting as seen in the weights plot for the second row. In the third row\rredit{(g, h, i)}, we have only fed in the $\Delta$CCF $\re$dit{indexes} that are considered significant (as described in Section $\re$f{sign_indexes}) and use the \bedit{properly} shifted $\Delta$CCFs.  (*fig:overfitting diagnostics*)

</div>
<div id="div_fig2">

<img src="tmp_2401.12276/./Figures_updated_referee/Fig8_cropped.png" alt="Fig11" width="100%"/>

**Figure 11. -** Old DRS (3.7) Periodograms as a function of number of CCF $\re$dit{indexes} used in the stellar activity correction model. HARPS-N Raw (a) and Corrected (b) RVs in Fourier space for 5 CCF $\re$dit{indexes}. \rredit{In each panel, the 1.0\% and 10.0\% false alarm probabilities (FAP) computed using the bootstrap method are indicated with green and black dotted lines, respectively.} The long-term activity signals in panel (a) decrease in magnitude after applying the activity correction in panel (b). The suspected rotation period and half the rotation period are indicated in grey. The planet period is indicated in yellow. As we increase the number of included CCF $\re$dit{indexes} to N=15 (c) and N=25 (d), the peak corresponding to the star's rotation period ($P_{\rm rot}$) decreases significantly in magnitude after applying the stellar activity corrections and a planet signal emerges at $\litperiod$ days. (*fig:old_drs_correctedperiodogram*)

</div>
<div id="div_fig3">

<img src="tmp_2401.12276/./Figures_updated_referee/Periodogram_shifted5CCFs__all_data23_11_30_12_00_09PM_cropped.png" alt="Fig12" width="100%"/>

**Figure 12. -** New DRS (2.3.5) Periodograms for 5 CCF $\re$dit{indexes}. HARPS-N Raw (a) and Corrected (b) RVs in Fourier space for 5 CCF $\re$dit{indexes}. \rredit{The 1.0\% and 10.0\% false alarm probabilities (FAP) computed using the bootstrap method are indicated with green and black dotted lines, respectively.} The long-term activity signals in panel (a) decrease in magnitude after applying the activity correction in panel (b). The suspected rotation period and half the rotation period are indicated in grey. The planet period is indicated in yellow. From (a) to (b), the peak corresponding to a planet signal at $\litperiod$ days increases slightly in magnitude. (*fig:new_drs_correctedperiodogram*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2401.12276"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\angstrom}{\textup{Å}}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Images of Betelgeuse with VLTI/MATISSE across the Great Dimming

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2401.12404-b31b1b.svg)](https://arxiv.org/abs/2401.12404)<mark>Appeared on: 2024-01-24</mark> - 

</div>
<div id="authors">

J. Drevon, et al. -- incl., <mark>R. v. Boekel</mark>

</div>
<div id="abstract">

**Abstract:** From Nov. 2019 to May 2020, the red supergiant star Betelgeuse experienced an unprecedented drop of brightness in the visible domain called the great dimming event. Large atmospheric dust clouds and large photospheric convective features are suspected to be responsible for it. To better understand the dimming event, we used mid-infrared long-baseline spectro-interferometric measurements of Betelgeuse taken with the VLTI/MATISSE instrument before (Dec. 2018), during (Feb. 2020), and after (Dec. 2020) the GDE. We present data in the 3.98 to 4.15 $\mu$ m range to cover SiO spectral features molecules as well as adjacent continuum. We have employed geometrical models, image reconstruction, as well as radiative transfer models to monitor the spatial distribution of SiO over the stellar surface.  We find a strongly in-homogeneous spatial distribution of SiO that appears to be looking very different between our observing epochs, indicative of a vigorous activity in the stellar atmosphere. The contrast of our images is small in the pseudo-continuum for all epochs, implying that our MATISSE observations support both cold spot and dust cloud model.

</div>

<div id="div_fig1">

<img src="tmp_2401.12404/./Images/2018-12_PAPER_mod.png" alt="Fig4.1" width="33%"/><img src="tmp_2401.12404/./Images/2020-02_PAPER_mod.png" alt="Fig4.2" width="33%"/><img src="tmp_2401.12404/./Images/2020-12_PAPER_mod.png" alt="Fig4.3" width="33%"/>

**Figure 4. -** Each rows correspond to a given epoch precised in the top-left corner of the first panel. From left to right: 1) Reconstructed map in the pseudo-continuum, 2) in the SiO (2--0) absorption band (both scaled in Jy/pix with a squared pixel size of 0.78 mas), 3) deduced map of the optical depth for the SiO (2--0), 4) (u,v)-plane coverage of the used measurements. The dashed white circles on the maps enclose intensities higher than 70\% of the maximum (42 mas in diameter). The small white disks at the bottom left corners of the reconstructed maps corresponds to an equivalent circular interferometric beam used for the convolution of all the reconstructed images ($\approx$ 4 mas in diameter). The angular size has been estimated using the Feb. 2020 observations which has the poorest (u,v)-plane coverage among the three epochs. (*fig:IMAGES*)

</div>
<div id="div_fig2">

<img src="tmp_2401.12404/./Images/TOTAL_LM_FLUX.png" alt="Fig1" width="100%"/>

**Figure 1. -** Top panel: VLTI/MATISSE absolute spectra for the three epochs with the identification of the main features.  The filled area close to the data point corresponds to the error bars associated to the given quantities. The red area represent the pseudo-continuum range used in this work. Second panel: Relative flux with respect to the continuum for the three epochs. Third panel: visibility squared plotted versus wavelengths for the various epochs of the observations. Bottom panel: fitted uniform disk diameter versus wavelengths for the various epochs. (*fig:FIT*)

</div>
<div id="div_fig3">

<img src="tmp_2401.12404/./Images/2018-12_PAPER_modified1.png" alt="Fig5.1" width="33%"/><img src="tmp_2401.12404/./Images/2020-02_PAPER_modified2.png" alt="Fig5.2" width="33%"/><img src="tmp_2401.12404/./Images/2020-12_PAPER_modified3.png" alt="Fig5.3" width="33%"/>

**Figure 5. -** Each row correspond to a given epoch precised in the top-left corner of the first panel. From left to right: 1) model of the uniform disk convolved with the same interferometric beam as used in Figure \ref{fig:IMAGES}, 2) reconstructed image using \texttt{IRBIS} of a simulated interferometric data of an uniform disk with an angular diameter determined using the values fitted on the visibility squared showed in Figure \ref{fig:FIT} for the SiO first overtone wavelength, 3) residuals between the model and the image reconstructed, 4) simulated (u,v)-plane coverage used for the image reconstruction of the simulated data. (*fig:IRBIS_test*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2401.12404"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

359  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
