# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

H. Beuther  ->  H. Beuther  |  ['H. Beuther']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
M. Zhai  ->  M. Zhai  |  ['M. Zhai']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
J. Bouwman  ->  J. Bouwman  |  ['J. Bouwman']
M. Samland  ->  M. Samland  |  ['M. Samland']
R. Franceschi  ->  R. Franceschi  |  ['R. Franceschi']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
M. Güdel  ->  M. Güdel  |  ['M. Güdel']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
J. Schreiber  ->  J. Schreiber  |  ['J. Schreiber']


Arxiv has 79 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2307.11817


extracting tarball to tmp_2307.11817...

 done.


list index out of range


Retrieving document from  https://arxiv.org/e-print/2307.11821


extracting tarball to tmp_2307.11821...

 done.










Found 71 bibliographic references in tmp_2307.11821/polarimetry_and_astrometry_of_NIR_Flares.bbl.
syntax error in line 5: unbalanced braces
Retrieving document from  https://arxiv.org/e-print/2307.12013


extracting tarball to tmp_2307.12013...

 done.
Retrieving document from  https://arxiv.org/e-print/2307.12040


extracting tarball to tmp_2307.12040...

 done.


G. Perotti  ->  G. Perotti  |  ['G. Perotti']
J. Bouwman  ->  J. Bouwman  |  ['J. Bouwman']
M. Samland  ->  M. Samland  |  ['M. Samland']
R. Franceschi  ->  R. Franceschi  |  ['R. Franceschi']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
M. Güdel  ->  M. Güdel  |  ['M. Güdel']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
J. Schreiber  ->  J. Schreiber  |  ['J. Schreiber']




[Errno 2] No such file or directory: 'gs'


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2307.11821-b31b1b.svg)](https://arxiv.org/abs/arXiv:2307.11821) | **Polarimetry and Astrometry of NIR Flares as Event Horizon Scale,  Dynamical Probes for the Mass of Sgr A***  |
|| G. Collaboration, et al. -- incl., <mark>S. Scheithauer</mark> |
|*Appeared on*| *2023-07-25*|
|*Comments*| *10 pages, 12 figures. Submitted to A&A*|
|**Abstract**| We present new astrometric and polarimetric observations of flares from Sgr A* obtained with GRAVITY, the near-infrared interferometer at ESO's Very Large Telescope Interferometer (VLTI), bringing the total sample of well-covered astrometric flares to four and polarimetric ones to six, where we have for two flares good coverage in both domains. All astrometric flares show clockwise motion in the plane of the sky with a period of around an hour, and the polarization vector rotates by one full loop in the same time. Given the apparent similarities of the flares, we present a common fit, taking into account the absence of strong Doppler boosting peaks in the light curves and the EHT-measured geometry. Our results are consistent with and significantly strengthen our model from 2018: We find that a) the combination of polarization period and measured flare radius of around nine gravitational radii ($9 R_g \approx 1.5 R_{ISCO}$, innermost stable circular orbit) is consistent with Keplerian orbital motion of hot spots in the innermost accretion zone. The mass inside the flares' radius is consistent with the $4.297 \times 10^6 \; \text{M}_\odot$ measured from stellar orbits at several thousand $R_g$. This finding and the diameter of the millimeter shadow of Sgr A* thus support a single black hole model. Further, b) the magnetic field configuration is predominantly poloidal (vertical), and the flares' orbital plane has a moderate inclination with respect to the plane of the sky, as shown by the non-detection of Doppler-boosting and the fact that we observe one polarization loop per astrometric loop. Moreover, c) both the position angle on sky and the required magnetic field strength suggest that the accretion flow is fueled and controlled by the winds of the massive, young stars of the clockwise stellar disk 1-5 arcsec from Sgr A*, in agreement with recent simulations. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2307.12013-b31b1b.svg)](https://arxiv.org/abs/arXiv:2307.12013) | **The Tianlin Mission: a 6m UV/Opt/IR space telescope to explore the  habitable worlds and the universe**  |
|| W. Wang, et al. -- incl., <mark>M. Zhai</mark>, <mark>X. Zhang</mark> |
|*Appeared on*| *2023-07-25*|
|*Comments*| *15 pages, 5 figures, accepted for publication in RAA and is available online*|
|**Abstract**| [Abridged] It is expected that the ongoing and future space-borne planet survey missions including TESS, PLATO, and Earth 2.0 will detect thousands of small to medium-sized planets via the transit technique, including over a hundred habitable terrestrial rocky planets. To conduct a detailed study of these terrestrial planets, particularly the cool ones with wide orbits, the exoplanet community has proposed various follow-up missions. The currently proposed ESA mission ARIEL is capable of characterization of planets down to warm super-Earths mainly using transmission spectroscopy. The NASA 6m UV/Opt/NIR mission proposed in the Astro2020 Decadal Survey may further tackle down to habitable rocky planets, and is expected to launch around 2045. In the meanwhile, China is funding a concept study of a 6-m class space telescope named Tianlin (A UV/Opt/NIR Large Aperture Space Telescope) that aims to start its operation within the next 10-15 years and last for 5+ years. Tianlin will be primarily aimed to the discovery and characterization of rocky planets in the habitable zones (HZ) around nearby stars and to search for potential biosignatures mainly using the direct imaging method. Transmission and emission spectroscopy at moderate to high resolution will be carried out as well on a population of exoplanets to strengthen the understanding of the formation and evolution of exoplanets. It will also carry out in-depth studies of the cosmic web and early galaxies, and constrain the nature of the dark matter and dark energy. We describe briefly the primary scientific motivations and main technical considerations based on our preliminary simulation results. We find that a monolithic off-axis space telescope with a primary mirror diameter larger than 6m equipped with a high contrast chronograph can identify water in the atmosphere of a habitable-zone Earth-like planet around a Sun-like star. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2307.12040-b31b1b.svg)](https://arxiv.org/abs/arXiv:2307.12040) | **Water in the terrestrial planet-forming zone of the PDS 70 disk**  |
|| <mark>G. Perotti</mark>, et al. -- incl., <mark>J. Bouwman</mark>, <mark>M. Samland</mark>, <mark>R. Franceschi</mark>, <mark>K. Schwarz</mark>, <mark>M. Güdel</mark>, <mark>S. Scheithauer</mark>, <mark>J. Schreiber</mark> |
|*Appeared on*| *2023-07-25*|
|*Comments*| *To appear in Nature on 24 July 2023. 21 pages, 10 figures; includes extended data. Part of the JWST MINDS Guaranteed Time Observations program's science enabling products. Spectra downloadable on Zenodo at this https URL*|
|**Abstract**| Terrestrial and sub-Neptune planets are expected to form in the inner ($<10~$AU) regions of protoplanetary disks. Water plays a key role in their formation, although it is yet unclear whether water molecules are formed in-situ or transported from the outer disk. So far Spitzer Space Telescope observations have only provided water luminosity upper limits for dust-depleted inner disks, similar to PDS 70, the first system with direct confirmation of protoplanet presence. Here we report JWST observations of PDS 70, a benchmark target to search for water in a disk hosting a large ($\sim54~$AU) planet-carved gap separating an inner and outer disk. Our findings show water in the inner disk of PDS 70. This implies that potential terrestrial planets forming therein have access to a water reservoir. The column densities of water vapour suggest in-situ formation via a reaction sequence involving O, H$_2$, and/or OH, and survival through water self-shielding. This is also supported by the presence of CO$_2$ emission, another molecule sensitive to UV photodissociation. Dust shielding, and replenishment of both gas and small dust from the outer disk, may also play a role in sustaining the water reservoir. Our observations also reveal a strong variability of the mid-infrared spectral energy distribution, pointing to a change of inner disk geometry. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error [Errno 2] No such file or directory: 'gs'</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2307.11817-b31b1b.svg)](https://arxiv.org/abs/arXiv:2307.11817) | **The diverse chemistry of protoplanetary disks as revealed by JWST**  |
|| E. F. v. Dishoeck, et al. -- incl., <mark>H. Beuther</mark> |
|*Appeared on*| *2023-07-25*|
|*Comments*| *17 pages, 8 figures. Author's version of paper submitted to Faraday Discussions January 18 2023, Accepted March 16 2023*|
|**Abstract**| Early results from the JWST-MIRI guaranteed time programs on protostars (JOYS) and disks (MINDS) are presented. Thanks to the increased sensitivity, spectral and spatial resolution of the MIRI spectrometer, the chemical inventory of the planet-forming zones in disks can be investigated with unprecedented detail across stellar mass range and age. Here data are presented for five disks, four around low-mass stars and one around a very young high-mass star. The mid-infrared spectra show some similarities but also significant diversity: some sources are rich in CO2, others in H2O or C2H2. In one disk around a very low-mass star, booming C2H2 emission provides evidence for a ``soot'' line at which carbon grains are eroded and sublimated, leading to a rich hydrocarbon chemistry in which even di-acetylene (C4H2) and benzene (C6H6) are detected (Tabone et al. 2023). Together, the data point to an active inner disk gas-phase chemistry that is closely linked to the physical structure (temperature, snowlines, presence of cavities and dust traps) of the entire disk and which may result in varying CO2/H2O abundances and high C/O ratios >1 in some cases. Ultimately, this diversity in disk chemistry will also be reflected in the diversity of the chemical composition of exoplanets. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error list index out of range</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2307.11821.md
    + _build/html/tmp_2307.11821/././figures/Fig3_combined_pol_astro.png
    + _build/html/tmp_2307.11821/././figures/Fig6_Astrometry_and_Fractional_Polarimetry_v2.png
    + _build/html/tmp_2307.11821/././figures/Fig7-centroid_motion_fit_with_fig3_binned_data.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand$</div>



<div id="title">

# Polarimetry and Astrometry of NIR Flares as Event Horizon Scale, Dynamical Probes for the Mass of Sgr A*

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2307.11821-b31b1b.svg)](https://arxiv.org/abs/2307.11821)<mark>Appeared on: 2023-07-25</mark> -  _10 pages, 12 figures. Submitted to A&A_

</div>
<div id="authors">

G. Collaboration, et al. -- incl., <mark>S. Scheithauer</mark>

</div>
<div id="abstract">

**Abstract:** We present new astrometric and polarimetric observations of flares from Sgr A* obtained with GRAVITY, the near-infrared interferometer at ESO’s Very Large Telescope Interferometer (VLTI), bringing the total sample of well-covered astrometric flares to four and polarimetric ones to six, where we have for two flares good coverage in both domains. All astrometric flares show clockwise motion in the plane of the sky with a period of around an hour, and the polarization vector rotates by one full loop in the same time. Given the apparent similarities of the flares, we present a common fit, taking into account the absence of strong Doppler boosting peaks in the light curves and the EHT-measured geometry. Our results are consistent with and significantly strengthen our model from 2018: We find that a) the combination of polarization period and measured flare radius of around nine gravitational radii ( $9 R_g \approx 1.5 R_{ISCO}$ , innermost stable circular orbit) is consistent with Keplerian orbital motion of hot spots in the innermost accretion zone. The mass inside the flares’ radius is consistent with the $\SI{4.297e6}{\solarmass}$ measured from stellar orbits at several thousand $R_g$ . This finding and the diameter of the millimeter shadow of Sgr A* thus support a single black hole model. Further, b) the magnetic field configuration is predominantly poloidal (vertical), and the flares’ orbital plane has a moderate inclination with respect to the plane of the sky, as shown by the non-detection of Doppler-boosting and the fact that we observe one polarization loop per astrometric loop. Moreover, c) both the position angle on sky and the required magnetic field strength suggest that the accretion flow is fueled and controlled by the winds of the massive, young stars of the clockwise stellar disk 1-5 $\arcsec$ from Sgr A*, in agreement with recent simulations.

</div>

<div id="div_fig1">

<img src="tmp_2307.11821/././figures/Fig3_combined_pol_astro.png" alt="Fig9" width="100%"/>

**Figure 9. -** Combined astrometric (left half) and polarimetric (right half) data. The outer left panels show \SI{R.A}, \SI{Dec.} and position angle on sky as a function of time. The full data are shown in gray, the colored points are bins of five minutes, and the color indicates time. The outer right panels show $Q/I$, $U/I$ and polarization angle on sky versus time. Overplotted in the angle plots are slopes of $\SI{6}{◦ee/\minute} = \SI{360}{◦ee/hour}$. The top panels in the middle illustrate the loops on sky (left) and in the $Q-U$ plane (right). The bottom middle panel shows the rotation of the polarization for the corresponding astrometric points -- one polarization rotation per astrometric orbit. The electric field vector rotates clockwise in the plane of the sky, as well as in the $Q-U$ plane.
	 (*fig:averaged_flares*)

</div>
<div id="div_fig2">

<img src="tmp_2307.11821/././figures/Fig6_Astrometry_and_Fractional_Polarimetry_v2.png" alt="Fig2" width="100%"/>

**Figure 2. -**  Comparison of the polarization model with $R = 9 R_g$ and $i = \SI{157}{◦ee}$ with the data from Fig. \ref{fig:averaged_flares}.
	 (*fig:polarization_fit*)

</div>
<div id="div_fig3">

<img src="tmp_2307.11821/././figures/Fig7-centroid_motion_fit_with_fig3_binned_data.png" alt="Fig10" width="100%"/>

**Figure 10. -** Combined fit of the astrometric flare data, taking into account the constraints from polarimetry. Left: On-sky motion. The gray disk corresponds to the shadow size of a Schwarzschild black hole $3 \sqrt{3}R_g$. Middle and right panels: The individual coordinates as a function of time. The gray data points are the full data set and the colored points are bins of five minutes. (*fig:astrometry_fit*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2307.11821"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

233  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
