# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

E. Bañados  ->  E. Bañados  |  ['E. Bañados']
S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
F. Walter  ->  F. Walter  |  ['F. Walter']
A. Kospal  ->  A. Kospal  |  ['A. Kospal']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']


Arxiv has 64 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2406.07612


extracting tarball to tmp_2406.07612...

 done.


Found 108 bibliographic references in tmp_2406.07612/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2406.07689
extracting tarball to tmp_2406.07689...

 done.


Found 150 bibliographic references in tmp_2406.07689/example.bbl.
Retrieving document from  https://arxiv.org/e-print/2406.07896


extracting tarball to tmp_2406.07896... done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.07612-b31b1b.svg)](https://arxiv.org/abs/2406.07612) | **Optical and near-infrared spectroscopy of quasars at $z>6.5$: public data release and composite spectrum**  |
|| S. Onorato, et al. -- incl., <mark>E. Bañados</mark>, <mark>S. Belladitta</mark>, <mark>F. Walter</mark> |
|*Appeared on*| *2024-06-13*|
|*Comments*| *18 pages, 10 figures, submitted to MNRAS*|
|**Abstract**|            We present optical and near-infrared (NIR) spectroscopic observations for a sample of $45$ quasars at $6.50 < z \leq 7.64$ with absolute magnitudes at $1450$ Å in the range $-28.82 \leq M_{1450} \leq -24.13$ and their composite spectrum. The median redshift and $M_{1450}$ of the quasars in the sample are $z_{\rm{median}}=6.71$ and $M_{1450,\rm{median}} \simeq -26.1$, respectively. The NIR spectra are taken with echelle spectrographs, complemented with additional data from optical long slit instruments, and then reduced consistently using the open-source Python-based spectroscopic data reduction pipeline PypeIt. The median value of the mean signal-to-noise ratios of the spectra in J, H, and K band (median $\langle \rm{SNR}_{\lambda} \rangle$) is: median $\langle \rm{SNR}_{J} \rangle=9.7$, median $\langle \rm{SNR}_{H} \rangle=10.3$, and median $\langle \rm{SNR}_{K} \rangle=11.7$; demonstrating the good data quality. This work presents the largest medium/moderate-resolution sample of quasars at $z>6.5$ from ground-based instruments. Its homogeneity and reproducibility make it ideally suited for several scientific goals, i.e., the study of the quasar proximity zones and damping wings, the Ly$\alpha$ forest, the intergalactic medium's metal content, as well as other properties such as the distribution of SMBH masses and Eddington ratios. Our composite spectrum is compared to others at both high and low-$z$ from the literature, showing differences in the strengths of many emission lines, probably due to differences in luminosity among the samples, but a consistent continuum slope, which proves that the same spectral features are preserved in quasars at different redshift ranges.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.07689-b31b1b.svg)](https://arxiv.org/abs/2406.07689) | **Evidence for Non-zero Turbulence in the Protoplanetary disc around IM Lup**  |
|| K. Flaherty, et al. -- incl., <mark>A. Kospal</mark> |
|*Appeared on*| *2024-06-13*|
|*Comments*| *Accepted by MNRAS, 17 pages, 12 figures*|
|**Abstract**|            The amount of turbulence in protoplanetary discs around young stars is critical for determining the efficiency, timeline, and outcomes of planet formation. It is also difficult to measure. Observations are still limited, but direct measurements of the non-thermal, turbulent gas motion are possible with the Atacama Large Millimeter/submillimeter Array (ALMA). Using CO(2-1)/$^{13}$CO(2-1)/C$^{18}$O(2-1) ALMA observations of the disc around IM Lup at ~0.4" (~60 au) resolution we find evidence of significant turbulence, at the level of $\delta v_{\rm turb}=(0.18-0.30)$c$_s$. This result is robust against systematic uncertainties (e.g., amplitude flux calibration, midplane gas temperature, disc self-gravity). We find that gravito-turbulence as the source of the gas motion is unlikely based on the lack of an imprint on the rotation curve from a massive disc, while magneto-rotational instabilities and hydrodynamic instabilities are still possible, depending on the unknown magnetic field strength and the cooling timescale in the outer disc.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.07896-b31b1b.svg)](https://arxiv.org/abs/2406.07896) | **Investigating Sulfur Chemistry in the HD 163296 disk**  |
|| R. Ma, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-06-13*|
|*Comments*| *16 pages, 7 figures*|
|**Abstract**|            Sulfur chemistry in the formation process of low-mass stars and planets remains poorly understood. The protoplanetary disks (PPDs) are the birthplace of planets and its distinctive environment provides an intriguing platform for investigating models of sulfur chemistry. We analyzed the ALMA observations of CS 7-6 transitions in the HD 163296 disk and perform astrochemical modeling to explore its sulfur chemistry. We simulated the distribution of sulfur-containing molecules and compared it with observationally deduced fractional column densities. We have found that the simulated column density of CS is consistent with the observationally deduced fractional column densities, while the simulated column density of C$_2$S is lower than the observationally deduced upper limits on column densities. This results indicate that we have a good understanding of the chemical properties of CS and C$_2$S in the disk. We also investigated the influence of the C/O ratio on sulfur-containing molecules and found that the column densities of SO, SO$_2$, and H$_2$S near the central star are dependent on the C/O ratio. Additionally, we found that the $N$[CS]/$N$[SO] ratio can serve as a promising indicator of the disk's C/O ratio in the HD 163296. Overall, the disk of HD 163296 provides a favorable environment for the detection of sulfur-containing molecules.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2406.07612.md
    + _build/html/tmp_2406.07612/./figures/composite_spectrum_new_1450.png
    + _build/html/tmp_2406.07612/./figures/J-Mag-z_plot_hist.png
    + _build/html/tmp_2406.07612/./figures/J0910_bal-composite_2000-0.30.png
    + _build/html/tmp_2406.07612/./figures/J0923_bal-composite_2000-0.80.png
exported in  _build/html/2406.07689.md
    + _build/html/tmp_2406.07689/figures/Temp_Z_fiducial.png
    + _build/html/tmp_2406.07689/figures/imspec+noturb.png
    + _build/html/tmp_2406.07689/figures/HR_alpha.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\angstrom}{\textup{Å}}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Optical and near-infrared spectroscopy of quasars at $z>6.5$: public data release and composite spectrum

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.07612-b31b1b.svg)](https://arxiv.org/abs/2406.07612)<mark>Appeared on: 2024-06-13</mark> -  _18 pages, 10 figures, submitted to MNRAS_

</div>
<div id="authors">

S. Onorato, et al. -- incl., <mark>E. Bañados</mark>, <mark>S. Belladitta</mark>, <mark>F. Walter</mark>

</div>
<div id="abstract">

**Abstract:** We present optical and near-infrared (NIR) spectroscopic observations for a sample of $45$ quasars at $6.50 < z \leq 7.64$ with absolute magnitudes at $1450$ ${\angstrom}$ in the range $-28.82 \leq M_{1450} \leq -24.13$ and their composite spectrum. The median redshift and $M_{1450}$ of the quasars in the sample are $z_{\rm{median}}=6.71$ and $M_{1450,\rm{median}} \simeq -26.1$ , respectively.The NIR spectra are taken with echelle spectrographs, complemented with additional data from optical long slit instruments, and then reduced consistently using the open-source Python-based spectroscopic data reduction pipeline \texttt{PypeIt} . The median value of the mean signal-to-noise ratios of the spectra in J, H, and K band (median $\langle \rm{SNR}_{\lambda} \rangle$ ) is: median $\langle \rm{SNR}_{J} \rangle=9.7$ , median $\langle \rm{SNR}_{H} \rangle=10.3$ , and median $\langle \rm{SNR}_{K} \rangle=11.7$ ; demonstrating the good data quality.This work presents the largest medium/moderate-resolution sample of quasars at $z>6.5$ from ground-based instruments. Its homogeneity and reproducibility make it ideally suited for several scientific goals, i.e., the study of the quasar proximity zones and damping wings, the Ly $\alpha$ forest, the intergalactic medium's metal content, as well as other properties such as the distribution of SMBH masses and Eddington ratios. Our composite spectrum is compared to others at both high and low- $z$ from the literature, showing differences in the strengths of many emission lines, probably due to differences in luminosity among the samples, but a consistent continuum slope, which proves that the same spectral features are preserved in quasars at different redshift ranges.

</div>

<div id="div_fig1">

<img src="tmp_2406.07612/./figures/composite_spectrum_new_1450.png" alt="Fig9" width="100%"/>

**Figure 9. -** _Bottom panel_: Composite spectrum of the $33$ non-BAL quasars in the sample (black) with its noise vector (grey), compared with several other composites from the literature (colored curves). All the composites are normalized to the continuum flux at 1450 {$\angstrom$}. The grey band for $\lambda<1225${$\angstrom$} highlights the mask-free region, where none of the masks described in the main text is applied. The main emission lines are shown as dashed blue lines. The inset panel shows a zoom-in of the region [1175, 1580]{$\angstrom$}, where the composites differ more. _Middle panel_: Number of spectra that are contributing to the composite at each wavelength for this work and the only two available from the literature (same colors as in the bottom panel). _Top panel_: Mean redshift that contributes to the composite at each wavelength for this work. (*fig:composite*)

</div>
<div id="div_fig2">

<img src="tmp_2406.07612/./figures/J-Mag-z_plot_hist.png" alt="Fig1" width="100%"/>

**Figure 1. -** _Panel (a)_: Distribution of J-band photometry used to scale the spectra as a function of $z$ for all the $45$ quasars in this sample. The legend is unique and split between the two plots, showing the sources color-coded with decreasing $z$. The full symbol marks the quasars for which J-band photometry is available, while the open symbol marks those for which it is not (we report Y-band photometry for J1243$+$0100 and J0923$+$0753, and $\rm{K_{p}}$-band photometry for J1058$+$2930). The error bars show the uncertainties on both $z$ and the photometric measurements. _Panel (b)_: Distribution of $M_{1450}$ as a function of $z$ for all the $45$ quasars in this sample. The error bars show the uncertainties on $z$. The circle marks the $M_{1450}$ computed from the spectrum scaled with J, Y, or $\rm{K_{p}}$-band photometry. The star marks the quasars for what $M_{1450}$ from spectroscopy is not reliable because of the low SNR of the spectrum or appearance of BAL features. In the case of J1243$+$0100, $M_{1450}$ is from the discovery paper ( ([ and Matsuoka 2019](https://ui.adsabs.harvard.edu/abs/2019ApJ...872L...2M)) ); while for J0910$-$0414 and J0923$+$0402, we follow the method described in Appendix \ref{app:bal}. _Panel (c)_: Histogram of the redshift distribution of the sample, with bins of size 0.05. The dashed red line represents the median redshift ($z_{\rm{median}}=6.71$). _Panel (d)_: Histogram of the $M_{1450}$ distribution of the sample, with bins of size 0.2. The dashed red line represents the median $M_{1450}$($M_{1450,\rm{median}} \simeq -26.1$). (*fig:M1450*)

</div>
<div id="div_fig3">

<img src="tmp_2406.07612/./figures/J0910_bal-composite_2000-0.30.png" alt="Fig4.1" width="50%"/><img src="tmp_2406.07612/./figures/J0923_bal-composite_2000-0.80.png" alt="Fig4.2" width="50%"/>

**Figure 4. -** _Top_: Match between the spectrum of J0910$-$0414 and the composite spectrum obtained from the sample excluding the BALs already corrected to get a better estimate of the $M_{1450}$. The spectrum of the BAL quasar is shown in blue, while the composite is in orange, with their noise vectors reported at the bottom of the plot. The back dashed line falls at $\lambda=1450${$\angstrom$} and the green dashed line is the normalization wavelength at $2000${$\angstrom$}. The magenta star is the "new" continuum level assumed for the BAL quasar at $1450${$\angstrom$}. _Bottom_: Same, but for J0923$+$0402. (*fig:matchbal*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.07612"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Evidence for Non-zero Turbulence in the Protoplanetary disc around IM Lup

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.07689-b31b1b.svg)](https://arxiv.org/abs/2406.07689)<mark>Appeared on: 2024-06-13</mark> -  _Accepted by MNRAS, 17 pages, 12 figures_

</div>
<div id="authors">

K. Flaherty, et al. -- incl., <mark>A. Kospal</mark>

</div>
<div id="abstract">

**Abstract:** The amount of turbulence in protoplanetary discs around young stars is critical for determining the efficiency, timeline, and outcomes of planet formation. It is also difficult to measure. Observations are still limited, but direct measurements of the non-thermal, turbulent gas motion are possible with the Atacama Large Millimeter/submillimeter Array (ALMA). Using CO(2--1)/ $^{13}$ CO(2--1)/C $^{18}$ O(2--1) ALMA observations of the disc around IM Lup at $\sim0\farcs4$ ( $\sim$ 60 au) resolution we find evidence of significant turbulence, at the level of $\delta v_{\rm turb}=(0.18-0.30)$ $c_s$ . This result is robust against systematic uncertainties (e.g., amplitude flux calibration, midplane gas temperature, disc self-gravity). We find that gravito-turbulence as the source of the gas motion is unlikely based on the lack of an imprint on the rotation curve from a massive disc, while magneto-rotational instabilities and hydrodynamic instabilities are still possible, depending on the unknown magnetic field strength and the cooling timescale in the outer disc.

</div>

<div id="div_fig1">

<img src="tmp_2406.07689/figures/Temp_Z_fiducial.png" alt="Fig2" width="100%"/>

**Figure 2. -** Temperature (top panel) and height of the emitting region (bottom panel) vs radius for our fiducial model (red band) and as derived directly from the data by [ and Law (2021)](https://ui.adsabs.harvard.edu/abs/2021ApJS..257....4L)(points). In the top panel the dashed line indicates a CO freeze-out temperature of 19 K, while in the bottom panel the dashed line indicates the location of the CO condensation front in our fiducial model. The extended emitting height for CO(2--1) comes from the fact that we see both the near and far side of the disc; the top of this region is seen on the near side of the disc, while the bottom is seen on the far side of the disc. The red Gaussian indicates the beam size in the data we analyze.  (*figure:Temp_Z_fiducial*)

</div>
<div id="div_fig2">

<img src="tmp_2406.07689/figures/imspec+noturb.png" alt="Fig1" width="100%"/>

**Figure 1. -** CO(2--1) spectra of the disc around IM Lup (black line) and the median of the PDFs from the fiducial model (red dashed line). The region that is subject to absorption by the molecular cloud, and is excluded from the MCMC process, is marked with a grey band. Despite the limited spectral range, we are able to place strong constraints on the non-thermal linewidth ($\delta v_{\rm turb}$=0.237$^{+0.017}_{-0.012}$$c_s$), with a significantly better fit to the data than with zero turbulence (green dotted line).  (*figure:spec*)

</div>
<div id="div_fig3">

<img src="tmp_2406.07689/figures/HR_alpha.png" alt="Fig4" width="100%"/>

**Figure 4. -** (Top): Height of the scattered light surface, normalized to the radius, for the dust rings identified in the scattered light image from [ and Avenhaus (2018)](https://ui.adsabs.harvard.edu/abs/2018ApJ...863...44A)(black dots), along with models with different dust scale heights. Ratios of dust to gas scale height between 0.7 and 1.0 match the height of the scattered light rings.
(Bottom): Constraints on $\alpha$ based on the dust scale height, as a function of the Stokes number. The dark grey band is the constraint on turbulence from the CO observations with CO depletion. The constraint on $\alpha$ from the dust scale height (solid line with arrows) is a lower limit given the asymptotic behavior of $\alpha$ as $H_d$/$H_g$ approaches 1, and is consistent with the results from [Franceschi, et. al (2022)](https://ui.adsabs.harvard.edu/abs/2022arXiv221201291F), who simultaneously constrain $\alpha$ and the maximum grain size. These constraints on the dust settling are also consistent with a decrease in the turbulence between the surface layers probed by CO, and the midplane probed by dust settling.  (*figure:HR_alpha*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.07689"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

94  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
