# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
A. Hughes  ->  A. Hughes  |  ['A. Hughes']
Arxiv has 37 new papers today
          2 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/2 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2403.19843


extracting tarball to tmp_2403.19843...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 364 bibliographic references in tmp_2403.19843/araa_arxiv_version.bbl.
syntax error in line 599: '=' expected
Retrieving document from  https://arxiv.org/e-print/2403.20057


extracting tarball to tmp_2403.20057...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.19843-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.19843) | **Molecular Gas and the Star Formation Process on Cloud Scales in Nearby  Galaxies**  |
|| <mark>E. Schinnerer</mark>, A. K. Leroy |
|*Appeared on*| *2024-04-01*|
|*Comments*| *70 pages, 15 figures, 9 tables. Authors' version of an article to appear in Annual Reviews of Astronomy and Astrophysics 2024, Vol 62*|
|**Abstract**| Observations that resolve nearby galaxies into individual regions across multiple phases of the gas-star formation-feedback ``matter cycle'' have provided a sharp new view of molecular clouds, star formation efficiencies, timescales for region evolution, and stellar feedback. We synthesize these results, cover aspects relevant to the interpretation of observables, and conclude that: (1) The observed cloud-scale molecular gas surface density, line width, and internal pressure all reflect the large-scale galactic environment while also appearing mostly consistent with properties of a turbulent medium strongly affected by self-gravity. (2) Cloud-scale data allow for statistical inference of both evolutionary and physical timescales. These suggest that clouds collapse on timescale of order the free-fall or turbulent crossing time ($\sim 10{-}30$~Myr) followed by the formation of massive stars and subsequent rapid ($\lesssim$ 5 Myr) gas clearing. The star formation efficiency per free-fall time is well determined over thousands of regions to be $\epsilon_{\rm ff}\approx 0.5_{-0.3}^{+0.7}\%$. (3) The role of stellar feedback is now measured using multiple observational approaches. The net momentum yield is constrained by the requirement to support the vertical weight of the galaxy disk. Meanwhile, the short gas clearing timescales suggest a large role for pre-supernova feedback in cloud disruption. This leaves the supernovae free to exert a large influence on the larger scale galaxy, including driving turbulence, launching galactic-scale winds, and carving superbubbles. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.20057-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.20057) | **Bias versus variance when fitting multi-species molecular lines with a  non-LTE radiative transfer model**  |
|| A. Roueff, et al. -- incl., <mark>A. Hughes</mark> |
|*Appeared on*| *2024-04-01*|
|*Comments*| *Astronomy and Astrophysics - A\&A, In press*|
|**Abstract**| Robust radiative transfer techniques are requisite for efficiently extracting the physical and chemical information from molecular rotational lines.We study several hypotheses that enable robust estimations of the column densities and physical conditions when fitting one or two transitions per molecular species. We study the extent to which simplifying assumptions aimed at reducing the complexity of the problem introduce estimation biases and how to detect them.We focus on the CO and HCO+ isotopologues and analyze maps of a 50 square arcminutes field. We used the RADEX escape probability model to solve the statistical equilibrium equations and compute the emerging line profiles, assuming that all species coexist. Depending on the considered set of species, we also fixed the abundance ratio between some species and explored different values. We proposed a maximum likelihood estimator to infer the physical conditions and considered the effect of both the thermal noise and calibration uncertainty. We analyzed any potential biases induced by model misspecifications by comparing the results on the actual data for several sets of species and confirmed with Monte Carlo simulations. The variance of the estimations and the efficiency of the estimator were studied based on the Cram{\'e}r-Rao lower bound.Column densities can be estimated with 30% accuracy, while the best estimations of the volume density are found to be within a factor of two. Under the chosen model framework, the peak 12CO(1--0) is useful for constraining the kinetic temperature. The thermal pressure is better and more robustly estimated than the volume density and kinetic temperature separately. Analyzing CO and HCO+ isotopologues and fitting the full line profile are recommended practices with respect to detecting possible biases.Combining a non-local thermodynamic equilibrium model with a rigorous analysis of the accuracy allows us to obtain an efficient estimator and identify where the model is misspecified. We note that other combinations of molecular lines could be studied in the future. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2403.19843.md
    + _build/html/tmp_2403.19843/./figures/araa_densetracer.png
    + _build/html/tmp_2403.19843/./figures/araa_sun20.jpg
    + _build/html/tmp_2403.19843/./figures/Molecular_Cloud_Evolution_v2.png
    + _build/html/tmp_2403.19843/./figures/NGC628_nobox.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\hii}{\textsc{Hii}}$
$\newcommand{\ha}{\rm H{\alpha}}$
$\newcommand{\hbeta}{\rm H{\beta}}$
$\newcommand{\paa}{\rm Pa{\alpha}}$
$\newcommand{\pab}{\rm Pa{\beta}}$
$\newcommand{\bra}{\rm Br{\alpha}}$
$\newcommand{\brg}{\rm Br{\gamma}}$
$\newcommand{\htwo}{\rm H_2}$
$\newcommand{\arcsec}{^{\prime\prime}}$
$\newcommand{\msun}{M_{\odot}}$
$\newcommand{\mstar}{M_{\star}}$
$\newcommand{\Sstar}{\Sigma_{\star}}$
$\newcommand{\Ssfr}{\Sigma_{SFR}}$
$\newcommand{\Smol}{\Sigma_{mol}}$
$\newcommand{\alphavir}{\rm \alpha_{vir}}$
$\newcommand{\alphaCO}{\rm \alpha_{CO}}$
$\newcommand{\acounits}{\textup{M\ensuremath{_\odot}~pc\ensuremath{^{-2}} (K~km~s\ensuremath{^{-1}})\ensuremath{^{-1}}}}$
$\newcommand{\sigsfrunits}{\textup{M\ensuremath{_\odot}~yr\ensuremath{^{-1}}~kpc\ensuremath{^{-2}}}}$
$\newcommand{\xcounits}{\textup{cm\ensuremath{^{-2}} (K~km~s\ensuremath{^{-1}})\ensuremath{^{-1}}}}$
$\newcommand{\xco}{\mbox{X_{\rm CO}}}$
$\newcommand{\aco}{\mbox{\alpha_{\rm CO}}}$
$\newcommand{\msunperpcsq}{\mbox{M_\odot pc^{-2}}}$
$\newcommand{\pasa}{Pub.~Astron.~Soc.~of Australia}$
$\newcommand{\aj}{AJ}$
$\newcommand{\araa}{ARA\&A}$
$\newcommand{\apj}{ApJ}$
$\newcommand{\apjl}{ApJ}$
$\newcommand{\apjs}{ApJS}$
$\newcommand{\ao}{Appl.~Opt.}$
$\newcommand{\apss}{Ap\&SS}$
$\newcommand{\aap}{A\&A}$
$\newcommand{\aapr}{A\&A~Rev.}$
$\newcommand{\aaps}{A\&AS}$
$\newcommand{\azh}{AZh}$
$\newcommand{\baas}{BAAS}$
$\newcommand{\jrasc}{JRASC}$
$\newcommand{\memras}{MmRAS}$
$\newcommand{\mnras}{MNRAS}$
$\newcommand{\pra}{Phys.~Rev.~A}$
$\newcommand{\prb}{Phys.~Rev.~B}$
$\newcommand{\prc}{Phys.~Rev.~C}$
$\newcommand{\prd}{Phys.~Rev.~D}$
$\newcommand{\pre}{Phys.~Rev.~E}$
$\newcommand{\prl}{Phys.~Rev.~Lett.}$
$\newcommand{\pasp}{PASP}$
$\newcommand{\pasj}{PASJ}$
$\newcommand{\qjras}{QJRAS}$
$\newcommand{\skytel}{S\&T}$
$\newcommand{\solphys}{Sol.~Phys.}$
$\newcommand{\sovast}{Soviet~Ast.}$
$\newcommand{\ssr}{Space~Sci.~Rev.}$
$\newcommand{\zap}{ZAp}$
$\newcommand{\nat}{Nature}$
$\newcommand{\iaucirc}{IAU~Circ.}$
$\newcommand{\aplett}{Astrophys.~Lett.}$
$\newcommand{\apspr}{Astrophys.~Space~Phys.~Res.}$
$\newcommand{\bain}{Bull.~Astron.~Inst.~Netherlands}$
$\newcommand{\fcp}{Fund.~Cosmic~Phys.}$
$\newcommand{\gca}{Geochim.~Cosmochim.~Acta}$
$\newcommand{\grl}{Geophys.~Res.~Lett.}$
$\newcommand{\jcp}{J.~Chem.~Phys.}$
$\newcommand{\jgr}{J.~Geophys.~Res.}$
$\newcommand{\jqsrt}{J.~Quant.~Spec.~Radiat.~Transf.}$
$\newcommand{\memsai}{Mem.~Soc.~Astron.~Italiana}$
$\newcommand{\nphysa}{Nucl.~Phys.~A}$
$\newcommand{\physrep}{Phys.~Rep.}$
$\newcommand{\physscr}{Phys.~Scr}$
$\newcommand{\planss}{Planet.~Space~Sci.}$
$\newcommand{\procspie}{Proc.~SPIE}$
$\newcommand{\nar}{NewAR}$
$\newcommand{\rmxaa}{{Rev. Mexicana Astron. Astrofis. }}$</div>



<div id="title">

# Molecular Gas and the Star Formation Process on Cloud Scales in Nearby Galaxies

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2403.19843-b31b1b.svg)](https://arxiv.org/abs/2403.19843)<mark>Appeared on: 2024-04-01</mark> -  _70 pages, 15 figures, 9 tables. Authors' version of an article to appear in Annual Reviews of Astronomy and Astrophysics 2024, Vol 62_

</div>
<div id="authors">

<mark>E. Schinnerer</mark>, A. K. Leroy

</div>
<div id="abstract">

**Abstract:** * The role of stellar feedback is now measured using multiple observational approaches. The net yield is constrained by the requirement to support the vertical weight of the galaxy disk. Meanwhile the short gas clearing timescales suggest a large role for pre-supernova feedback in cloud disruption. This leaves the supernovae free to exert a large influence on the larger galaxy, including stirring turbulence, launching galactic-scale winds, and carving superbubbles.\end{itemize}\end{minipage}$

</div>

<div id="div_fig1">

<img src="tmp_2403.19843/./figures/araa_densetracer.png" alt="Fig6" width="100%"/>

**Figure 6. -** Dense gas tracers and gas density. _Left:_ Illustration of how the density-dependent emissivity of a transition, here HCN (1-0) in blue, convolves with the physical gas density distribution, in red, to produce emission from a range of densities, in purple \citep[following][]{LEROY17DENSE}. Because HCN and similar lines often sample a steeply falling part of the density distribution, a sizeable fraction of  HCN emission often arises from gas below the critical density (black vertical line). A bulk gas tracer like CO would be sensitive to almost the full range of physical densities here. _Middle_: Correlation between HCN (1-0)/CO (1-0) and $N_2$H$^+$ (1-0)/CO (1-0) for regions within galaxies. The blue points from \citet{JIMENEZ23DENSE} show regions in NGC 6946 and a literature compilation. The red points show the binned trend from mapping of M51 by \citet{STUBER23DENSE}, with gray dots showing individual M51 sight lines and red contours indicating data density. $N_2$H$^+$ emerges primarily from cold, dense regions where CO freezes out, but is too faint to be surveyed in a wide range of extragalactic systems. The good correspondence shows that the brightest, most accessible extragalactic dense gas tracer, HCN, yields consistent results with $N_2$H$^+$. _Right_: Correlation between HCN/CO, a spectroscopic tracer of gas density, and the mean molecular gas surface density at 150 pc scales inferred from high resolution CO imaging \citep[from][]{NEUMANN23DENSE}. The excellent correspondence between these two extragalactic tracers of density supports the interpretation of HCN/CO as a density-sensitive line ratio and demonstrates a close link between cloud-scale mean density and physical density.
 (*fig:densetracing*)

</div>
<div id="div_fig2">

<img src="tmp_2403.19843/./figures/araa_sun20.jpg" alt="Fig3" width="100%"/>

**Figure 3. -** 
Cloud-scale surface density and line width across the local galaxy population, from \citet{SUN20GMCS}. _Left:_ Cloud-scale line width, $\sigma_{\rm mol, 150pc}$, as a function of CO (2-1) intensity. _Right:_$\sigma_{\rm mol, 150pc}$ as a function of cloud-scale $\Sigma_{\rm mol, 150pc}$. Both panels show $\sim 100,000$ independent cloud-scale sight lines across $66$ star-forming disk galaxies with measurements at fixed $150$ pc scale. Orange lines in the left panel indicate the sensitivity limits of the observations. Dashed and dashed-dotted lines show the _Heyer-Keto relation_ expected for clouds with fixed dynamical state (Eq. \ref{eq:heyerketo} with $\alpha_{\rm vir}=1$ and $2$). Dotted lines from top left to bottom right show isobars, indicating fixed internal pressure $P_{\rm int}$ at $P_{\rm int} / k_B = 10^3$, $10^4$, ..., $10^8$ cm$^{-3}$ K following Eq. \ref{eq:isobar}. Both panels illustrate a wide range of $\Sigma_{\rm mol}$, $\sigma_{\rm mol}$, and $P_{\rm int}$ with a narrower range of dynamical state, and the coloration by galactocentric radius illustrates that these are systematic variations.
 (*fig:heyerketo*)

</div>
<div id="div_fig3">

<img src="tmp_2403.19843/./figures/Molecular_Cloud_Evolution_v2.png" alt="Fig1.1" width="50%"/><img src="tmp_2403.19843/./figures/NGC628_nobox.png" alt="Fig1.2" width="50%"/>

**Figure 1. -** _Top:_ Visible separation of tracers of recent star formation (gold; VLT/MUSE H$\alpha$) and cold gas (blue; ALMA CO (2-1)) at "cloud-scale" resolution, here in NGC 628 from \citet{KRECKEL18SFR}.
_Bottom:_ Schematic view of the evolution of a molecular cloud from formation to star cluster. The cloud begins as an over-density of cold, predominantly molecular gas. A subset of the gas achieves high column and volume densities. Stars form from this dense material. Newly formed massive stars rapidly impact their surrounding birth material via radiation and winds, reshaping or even disrupting the cloud. Over time, the continued energy and momentum input from these young massive stars disperses the gas cloud. As a result, many core-collapse supernovae explode in relatively low density, pre-cleared surroundings. In the sketch, gas density increases from blue to white, attenuation of stellar light decreases from red to yellow, light red colors indicate gas ionized by massive stars, and the cyan object in the right panel represents a supernova. (*fig:sketch_cloud*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2403.19843"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

394  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

2  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
