# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

H. Linz  ->  H. Linz  |  ['H. Linz']
H. Linz  ->  H. Linz  |  ['H. Linz']
A. Frank  ->  A. Frank  |  ['A. Frank']
Arxiv has 44 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates[:-1]):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/2 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2304.14739


extracting tarball to tmp_2304.14739...

 done.


list index out of range
Retrieving document from  https://arxiv.org/e-print/2304.14740


extracting tarball to tmp_2304.14740...

 done.


list index out of range


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2304.14739-b31b1b.svg)](https://arxiv.org/abs/arXiv:2304.14739) | **A heat-wave of accretion energy traced by masers in the G358-MM1  high-mass protostar**  |
|| R. A. Burns, et al. -- incl., <mark>H. Linz</mark> |
|*Appeared on*| *2023-05-01*|
|*Comments*| *Published in Nature Astronomy in 2020*|
|**Abstract**| High-mass stars are thought to accumulate much of their mass via short, infrequent bursts of disk-aided accretion. Such accretion events are rare and difficult to observe directly but are known to drive enhanced maser emission. In this Letter we report high-resolution, multi-epoch methanol maser observations toward G358.93-0.03 which reveal an interesting phenomenon; the sub-luminal propagation of a thermal radiation "heat-wave" emanating from an accreting high-mass proto-star. The extreme transformation of the maser emission implies a sudden intensification of thermal infrared radiation from within the inner (40 mas, 270 au) region. Subsequently, methanol masers trace the radial passage of thermal radiation through the environment at $\geq$ 4-8\% the speed of light. Such a high translocation rate contrasts with the $\leq$ 10 km s$^{-1}$ physical gas motions of methanol masers typically observed using very long baseline interferometry (VLBI). The observed scenario can readily be attributed to an accretion event in the high-mass proto-star G358.93-0.03-MM1. While being the third case in its class, G358.93-0.03-MM1 exhibits unique attributes hinting at a possible `zoo' of accretion burst types. These results promote the advantages of maser observations in understanding high-mass star formation, both through single-dish maser monitoring campaigns and via their international cooperation as VLBI arrays. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2304.14740-b31b1b.svg)](https://arxiv.org/abs/arXiv:2304.14740) | **A Keplerian disk with a four-arm spiral birthing an episodically  accreting high-mass protostar**  |
|| R. A. Burns, et al. -- incl., <mark>H. Linz</mark> |
|*Appeared on*| *2023-05-01*|
|*Comments*| *Published in Nature Astronomy in 2023*|
|**Abstract**| High-mass protostars (M$_{\star} >$ 8 M$_{\odot}$) are thought to gain the majority of their mass via short, intense bursts of growth. This episodic accretion is thought to be facilitated by gravitationally unstable and subsequently inhomogeneous accretion disks. Limitations of observational capabilities, paired with a lack of observed accretion burst events has withheld affirmative confirmation of the association between disk accretion, instability and the accretion burst phenomenon in high-mass protostars. Following its 2019 accretion burst, a heat-wave driven by a burst of radiation propagated outward from the high-mass protostar G358.93-0.03-MM1. Six VLBI (very long baseline interferometry) observations of the raditively pumped 6.7 GHz methanol maser were conducted during this period, tracing ever increasing disk radii as the heat-wave propagated outward. Concatenating the VLBI maps provided a sparsely sampled, milliarcsecond view of the spatio-kinematics of the accretion disk covering a physical range of $\sim$ 50 - 900 AU. We term this observational approach `heat-wave mapping'. We report the discovery of a Keplerian accretion disk with a spatially resolved four-arm spiral pattern around G358.93-0.03-MM1. This result positively implicates disk accretion and spiral arm instabilities into the episodic accretion high-mass star formation paradigm. |

## Failed papers

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2304.14739.md
    + _build/html/tmp_2304.14739/./Schematic_5.png
    + _build/html/tmp_2304.14739/./VX026AC_QD22.png
    + _build/html/tmp_2304.14739/./G358_MOMNT_COL_ed.png
    + _build/html/tmp_2304.14739/./G358_MOMNT_newEd2.png
    + _build/html/tmp_2304.14739/./AutoCross_Zoom_Together11.png
exported in  _build/html/2304.14740.md
    + _build/html/tmp_2304.14740/./Fig4.png
    + _build/html/tmp_2304.14740/./Fig5.png
    + _build/html/tmp_2304.14740/./Fig3.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\bibinfo}[2]{#2}$
$\newcommand{\eprint}[2][]{\url{#2}}$
$\newcommand{\bibinfo}[2]{#2}$
$\newcommand{\eprint}[2][]{\url{#2}}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\includegraphics}[2][]$
$\newcommand{\}{url}$
$\newcommand{\urlprefix}{URL }$
$\newcommand{\}{url}$
$\newcommand{\urlprefix}{URL }$</div>



<div id="title">

# A "Heatwave" from the G358 accretion event

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2304.14739-b31b1b.svg)](https://arxiv.org/abs/2304.14739)<mark>Appeared on: 2023-05-01</mark> -  _Published in Nature Astronomy in 2020_

</div>
<div id="authors">

R. A. Burns, et al. -- incl., <mark>H. Linz</mark>

</div>
<div id="abstract">

**Abstract:** High-mass stars are thought to accumulate much of their mass via short, infrequent bursts of disk-aided accretion \cite{Stamatellos11,Meyer17} . Such accretion events are rare and difficult to observe directly but are known to drive enhanced maser emission \cite{Hunter18,Gordon18,Szymczak18,Moscadelli17} . In this Letter we report high-resolution, multi-epoch methanol maser observations toward G358.93-0.03 which reveal an interesting phenomenon; the sub-luminal propagation of a thermal radiation "heat-wave" emanating from an accreting high-mass proto-star.The extreme transformation of the maser emission implies a sudden intensification of thermal infrared radiation from within the inner (40 mas, 270 au) region. Subsequently, methanol masers trace the radial passage of thermal radiation through the environment at $\geq$ 4-8 \% the speed of light. Such a high translocation rate contrasts with the $\leq$ 10 km s $^{-1}$ physical gas motions of methanol masers typically observed using very long baseline interferometry (VLBI).The observed scenario can readily be attributed to an accretion event in the high-mass proto-star G358.93-0.03-MM1. While being the third case in its class, G358.93-0.03-MM1 exhibits unique attributes hinting at a possible `zoo' of accretion burst types.These results promote the advantages of maser observations in understanding high-mass star formation, both through single-dish maser monitoring campaigns and via their international cooperation as VLBI arrays.

</div>

<div id="div_fig1">

<img src="tmp_2304.14739/./Schematic_5.png" alt="Fig2.1" width="50%"/><img src="tmp_2304.14739/./VX026AC_QD22.png" alt="Fig2.2" width="50%"/>

**Figure 2. -** ** Schematic illustration of the observational data.**(*Left*) A schematic model of the maser distribution and evolution in an accreting star-disk system. *Right* Spot maps of emission above 5 $\sigma$ detailing the evolution of methanol maser emission in G358-MM1. Colours indicate the velocity in the frame of the local standard of rest, and symbol sizes are arbitrary. The upper and lower panels illustrate the data of vx026a (2nd Feb 2019) and vx026c (28th Feb 2019), respectively. Directional offsets are stated with respect to the coordinate (RA, DEC) = (17:43:10.1014, -29:51:45.693) which correspond to the position of G358-MM1 where the symbol size indicates the 40 mas absolute positional uncertainty in the continuum source position from \cite{Brogan19a}. The dark rings delineate the fits to each epoch, while the grey ring indicates the extent of the vx026a masers at the epoch of vx026c.  (*RINGS*)

</div>
<div id="div_fig2">

<img src="tmp_2304.14739/./G358_MOMNT_COL_ed.png" alt="Fig3.1" width="50%"/><img src="tmp_2304.14739/./G358_MOMNT_newEd2.png" alt="Fig3.2" width="50%"/>

**Figure 3. -** ** Methanol maser distributions in G358-MM1.** Zero'th (contours) and first (colours) moment maps of the 6.7 GHz methanol maser emission in G358-MM1. *Left* shows the distribution of emission during the vx026a epoch while *right* shows that of vx026c, taken 26 days later. Moment image cubes were produced for emission above a 5 $\sigma$ cutoff and contours increase by factors of 2 multiples of the first contour at 2 Jy beam$^{-1}$ km s$^{-1}$. The white cross indicates the position of the brightest millimeter continuum peak of the G358-MM1 region \cite{Brogan19a}.  (*MOMNT*)

</div>
<div id="div_fig3">

<img src="tmp_2304.14739/./AutoCross_Zoom_Together11.png" alt="Fig1" width="100%"/>

**Figure 1. -** ** Spectral profiles of the 6.7 GHz methanol maser emission in G358-MM1.** Solid shapes and lines indicate the auto- and cross-correlation spectra respectively. Magnifications are shown in the lower insets to display low flux density components. The dashed line indicates the source systemic velocity.  (*SPECTRA*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2304.14739"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\bibinfo}[2]{#2}$
$\newcommand{\micro}{\fontsize{4pt}{4pt}\selectfont}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\thefootnote}{\fnsymbol{footnote}}$
$\newcommand{\includegraphics}[2][]$
$\newcommand{\kms}{km s^{-1}}$
$\newcommand{\nh}{NH_3}$
$\newcommand{\HII}{H \emissiontype{II} }$
$\newcommand{\ho}{H_2O}$
$\newcommand{\red}{\textcolor{red}}$
$\newcommand{\blue}{\textcolor{blue}}$
$\newcommand{\fdg}{.\!\!^\circ}$
$\newcommand{\}{url}$</div>



<div id="title">

# A Keplerian disk with a four-arm spiral birthing an episodically accreting high-mass protostar

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2304.14740-b31b1b.svg)](https://arxiv.org/abs/2304.14740)<mark>Appeared on: 2023-05-01</mark> -  _Published in Nature Astronomy in 2023_

</div>
<div id="authors">

R. A. Burns, et al. -- incl., <mark>H. Linz</mark>

</div>
<div id="abstract">

**Abstract:** High-mass protostars (M $_{\star} >$ 8 M $_{\odot}$ ) are thought to gain the majority of their mass via short, intense bursts of growth. This episodic accretion is thought to be facilitated by gravitationally unstable and subsequently inhomogeneous accretion disks. Limitations of observational capabilities, paired with a lack of observed accretion burst events has withheld affirmative confirmation of the association between disk accretion, instability and the accretion burst phenomenon in high-mass protostars.Following its 2019 accretion burst, a heat-wave driven by a burst of radiation propagated outward from the high-mass protostar G358.93-0.03-MM1.Six VLBI (very long baseline interferometry) observations of the raditively pumped 6.7 GHz methanol maser were conducted during this period, tracing ever increasing disk radii as the heat-wave propagated outward. Concatenating the VLBI maps provided a sparsely sampled, milliarcsecond view of the spatio-kinematics of the accretion disk covering a physical range of $\sim$ 50 - 900 AU. We term this observational approach `heat-wave mapping'.We report the discovery of a Keplerian accretion disk with a spatially resolved four-arm spiral pattern around G358.93-0.03-MM1. This result positively implicates disk accretion and spiral arm instabilities into the episodic accretion high-mass star formation paradigm.

</div>

<div id="div_fig1">

<img src="tmp_2304.14740/./Fig4.png" alt="Fig4" width="100%"/>

**Figure 4. -** ** Identification of spiral arms A and B**(*Upper left*) shows the AIC, BIC and $\chi$ values for 200,000 trials of each residual threshold in the RANSAC procedure for arm A. (*Lower left*) shows the posterior distribution for pitch angles fit during 10,000 samples in the MCMC procedure for arm A. (*Upper and lower right*) show the same for arm B. (*Center*) shows the six combined spotmap data set in $\phi-ln(R)$ space where spots determined by RANSAC to be associated with arms A and B, and best fit arm functions determined by MCMC, are shown in green and blue, respectively. The nomial locations of spiral arms C and D are shown as dashed lines. Colours of arms are consistent with Figure \ref{Fig5}. Error bars express each maser's astrometric uncertainty. (*Fig4*)

</div>
<div id="div_fig2">

<img src="tmp_2304.14740/./Fig5.png" alt="Fig5" width="100%"/>

**Figure 5. -** ** 4-arm spiral identification***Below* shows the results of the spatial 2D cross-correlation as a function of azimuth angle. The green and blue regions indicate the full-width half-maximum range for the correlation peaks associated with arms A and B, respectively. Red and yellow regions highlight the disk regions at $\pm 180^{\circ}$ opposite to the green and blue regions where symmetric arm pairs were searched. The grey line shows a $5\sigma$ detection criteria derived from the 5 times the standard deviation of correlation coefficients at each azimuth acquired from 10 sets of random data of the same size and variable ranges as the maser data. (*Above*) shows the spiral structure model in G358-MM1 plotted on the flux density map (grey-scale). Green and blue arms represent arms A and B respectively, parameterised by RANSAC and MCMC. The red and yellow lines illustrate arms C and D, which represent the symmetric pairs of arms A and B, respectively, detected using 2D cross-correlation.  (*Fig5*)

</div>
<div id="div_fig3">

<img src="tmp_2304.14740/./Fig3.png" alt="Fig3" width="100%"/>

**Figure 3. -** ** Analyses of maser data**(*Left*) Shows the Position-velocity diagram for the maser data an best fit line to the Keplerian profile. Bootrapping trials are shown as semi-opaque lines, indicating the fit uncertainty. *Right* shows the maser spot positions for the outer 50\% of masers in the 5th VLBI epoch (blue circles) where the point size represents the 2.60 milliarcsecond positional uncertainty in the astrometric positions. Spots omitted from the ellipse fitting are shown in grey. The black line shows the ellipse fit to the maser data to determine the system inclination.   (*Fig3*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2304.14740"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

200  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
