# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']


J. Mah  ->  J. Mah  |  ['J. Mah']
Arxiv has 54 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2309.00036


extracting tarball to tmp_2309.00036...

 done.
Retrieving document from  https://arxiv.org/e-print/2309.00039


extracting tarball to tmp_2309.00039...

 done.
Retrieving document from  https://arxiv.org/e-print/2309.00509


extracting tarball to tmp_2309.00509...

 done.


Found 73 bibliographic references in tmp_2309.00509/Heavy.bbl.
syntax error in line 511: premature end of file


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2309.00509-b31b1b.svg)](https://arxiv.org/abs/arXiv:2309.00509) | **Enriching inner discs and giant planets with heavy elements**  |
|| B. Bitsch, <mark>J. Mah</mark> |
|*Appeared on*| *2023-09-04*|
|*Comments*| *Accepted by A&A, 10 pages, 9 figures*|
|**Abstract**| Giant exoplanets seem to have on average a much larger heavy element content than the solar system giants. Past attempts to explain these heavy element contents include collisions between planets, accretion of volatile rich gas and accretion of gas enriched in micro-metre sized solids. However, these different theories individually could not explain the heavy element content of giants and the volatile to refractory ratios in atmospheres of giant planets at the same time. Here we combine the approaches of gas accretion enhanced with vapor and small micro-meter sized dust grains. As pebbles drift inwards, the volatile component evaporates and enriches the disc, while the smaller silicate core of the pebble continues to move inwards. The smaller silicate pebbles drift slower, leading to a pile-up of material interior to the water ice line, increasing the dust-to-gas ratio interior to the ice line. Under the assumption that these small dust grains follow the motion of the gas, gas accreting giants accrete large fractions of small solids in addition to the volatile vapor. The effectiveness of the solid enrichment requires a large disc radius to maintain the pebble flux for a long time and a large viscosity that reduces the size and inward drift of the small dust grains. However, this process depends crucially on the debated size difference of the pebbles interior and exterior of the water ice line. On the other hand, the volatile component released by the inward drifting pebbles can lead to a large enrichment with heavy element vapor, independently of a size difference of pebbles interior and exterior to the water ice line. Our model stresses the importance of the disc's radius and viscosity on the enrichment of dust and vapor. Consequently we show how our model could explain the heavy element content of the majority of giant planets by using combined estimates of dust and vapor enrichment. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2309.00036-b31b1b.svg)](https://arxiv.org/abs/arXiv:2309.00036) | **Detection of Carbon Monoxide in the Atmosphere of WASP-39b Applying  Standard Cross-Correlation Techniques to JWST NIRSpec G395H Data**  |
|| E. Esparza-Borges, et al. -- incl., <mark>L. Kreidberg</mark> |
|*Appeared on*| *2023-09-04*|
|*Comments*| *Accepted for publication in The Astrophysical Journal Letters*|
|**Abstract**| Carbon monoxide was recently reported in the atmosphere of the hot Jupiter WASP-39b using the NIRSpec PRISM transit observation of this planet, collected as part of the JWST Transiting Exoplanet Community Early Release Science (JTEC ERS) Program. This detection, however, could not be confidently confirmed in the initial analysis of the higher resolution observations with NIRSpec G395H disperser. Here we confirm the detection of CO in the atmosphere of WASP-39b using the NIRSpec G395H data and cross-correlation techniques. We do this by searching for the CO signal in the unbinned transmission spectrum of the planet between 4.6 and 5.0 $\mu$m, where the contribution of CO is expected to be higher than that of other anticipated molecules in the planet's atmosphere. Our search results in a detection of CO with a cross-correlation function (CCF) significance of $6.6 \sigma$ when using a template with only ${\rm ^{12}C^{16}O}$ lines. The CCF significance of the CO signal increases to $7.5 \sigma$ when including in the template lines from additional CO isotopologues, with the largest contribution being from ${\rm ^{13}C^{16}O}$. Our results highlight how cross-correlation techniques can be a powerful tool for unveiling the chemical composition of exoplanetary atmospheres from medium-resolution transmission spectra, including the detection of isotopologues. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2309.00039-b31b1b.svg)](https://arxiv.org/abs/arXiv:2309.00039) | **Fuzzy dark matter dynamics in tidally perturbed dwarf spheroidal galaxy  satellites**  |
|| A. Widmark, T. D. Yavetz, X. Li |
|*Appeared on*| *2023-09-04*|
|*Comments*| *22 pages, 12 figure; appendix adds 10 pages, 5 figures*|
|**Abstract**| Fuzzy dark matter (FDM) has dynamical properties that differ significantly from cold dark matter (CDM). These dynamical differences are strongly manifested on the spatial scale of dwarf spheroidal galaxies (dSphs), which roughly corresponds to the de Broglie wavelength of a canonical mass FDM particle. We study simulations of a dSph satellite which is tidally perturbed by its host galaxy, in order to identify dynamical signatures that are unique to FDM, and to quantify the imprints of such perturbations on an observable stellar tracer population. We find that a perturbed FDM soliton develops a long-standing breathing mode, whereas for CDM such a breathing mode quickly phase-mixes and disappears. We also demonstrate that such signatures become imprinted on the dynamics of a stellar tracer population, making them observable with sufficiently precise astrometric measurements. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2309.00509.md
    + _build/html/tmp_2309.00509/./1_dtg.png
    + _build/html/tmp_2309.00509/./2_heavy.png
    + _build/html/tmp_2309.00509/./3_integrated.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Enriching inner discs and giant planets with heavy elements

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2309.00509-b31b1b.svg)](https://arxiv.org/abs/2309.00509)<mark>Appeared on: 2023-09-04</mark> -  _Accepted by A&A, 10 pages, 9 figures_

</div>
<div id="authors">

B. Bitsch, <mark>J. Mah</mark>

</div>
<div id="abstract">

**Abstract:** Giant exoplanets seem to have on average a much larger heavy element content than the solar system giants. Past attempts to explain these heavy element contents include collisions between planets, accretion of volatile rich gas and accretion of gas enriched in micro-metre sized solids. However, these different theories individually could not explain the heavy element content of giants and the volatile to refractory ratios in atmospheres of giant planets at the same time. Here we want to combine the approaches of gas accretion enhanced with vapor and small micro-meter sized dust grains within one model. To this end, we present detailed models of inward drifting and evaporating pebbles and how these influence the dust-to-gas ratio and the heavy element content of the disc. As pebbles drift inwards, the volatile component evaporates and enriches the disc. At the same time, the smaller silicate core of the pebble continues to move inwards. As the silicate pebbles are presumably smaller than the ice grains, they drift slower, leading to a pile-up of material interior to the water ice line, increasing the dust-to-gas ratio in this region. Under the assumption that these small dust grains follow the motion of the gas even through the pressure bumps generated by the gaps of planets, gas accreting giants can accrete large fractions of small solids in addition to the volatile vapor. We find that the effectiveness of the solid enrichment requires a large disc radius to maintain the pebble flux for a long time and a large viscosity that reduces the size and inward drift of the small dust grains. However, this process depends crucially on the debated size difference of the pebbles interior and exterior of the water ice line. On the other hand, the volatile component released by the inward drifting pebbles can lead to a large enrichment with heavy element vapor, independently of a size difference of pebbles interior and exterior to the water ice line. Our model stresses the importance of the disc's radius and viscosity on the enrichment of dust and vapor. Consequently we show how our model could explain the heavy element content of the majority of giant planets by using combined estimates of dust and vapor enrichment.

</div>

<div id="div_fig1">

<img src="tmp_2309.00509/./1_dtg.png" alt="Fig3" width="100%"/>

**Figure 3. -** Time evolution of the dust-to-gas ratio in protoplanetary discs with $\alpha = 10^{-4}$(top) and $\alpha=10^{-3}$(bottom) and disc radii of $R_{\rm c}=100$ AU (left) and $R_{\rm c}=250$ AU (right). The vertical lines mark the evaporation fronts of the different chemical species, where inward drifting pebbles evaporate and recondense, leading to pile-ups in the solid density.
    (*fig:dtg*)

</div>
<div id="div_fig2">

<img src="tmp_2309.00509/./2_heavy.png" alt="Fig4" width="100%"/>

**Figure 4. -** Time evolution of the heavy element content in the gas phase in protoplanetary discs with $\alpha = 10^{-4}$(top) and $\alpha=10^{-3}$(bottom) and disc radii of $R_{\rm c}=100$AU (left) and $R_{\rm c}=250$AU (right). The vertical lines mark the evaporation fronts of the different chemical species, where inward drifting pebbles evaporate, leading to increases in the heavy element content of the gas phase.
    (*fig:heavy*)

</div>
<div id="div_fig3">

<img src="tmp_2309.00509/./3_integrated.png" alt="Fig5" width="100%"/>

**Figure 5. -** Time evolution integration of the heavy element content of the gas phase at 0.5 AU either from the solids (see Fig. \ref{fig:dtg}) or the vapor (see Fig. \ref{fig:heavy}) for discs with $R_{\rm c}=250$ AU and $\alpha=10^{-4}$(left) and $\alpha=10^{-3}$(right).
    (*fig:integral*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2309.00509"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

260  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

7  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
