# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

X. Zhang  ->  X. Zhang  |  ['X. Zhang']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
J. Li  ->  J. Li  |  ['J. Li']
M. lehmitz  ->  M. Lehmitz  |  ['M. Lehmitz']
Arxiv has 69 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2406.18169


extracting tarball to tmp_2406.18169... done.
Retrieving document from  https://arxiv.org/e-print/2406.18274


extracting tarball to tmp_2406.18274...

 done.


Found 106 bibliographic references in tmp_2406.18274/main-euclid.bbl.
Retrieving document from  https://arxiv.org/e-print/2406.18303


extracting tarball to tmp_2406.18303...

 done.
Retrieving document from  https://arxiv.org/e-print/2406.18317


not a gzip file


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18274-b31b1b.svg)](https://arxiv.org/abs/2406.18274) | **Euclid preparation. Sensitivity to non-standard particle dark matter model**  |
|| E. Collaboration, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2024-06-27*|
|*Comments*| *31 pages, 21 figures*|
|**Abstract**|            The Euclid mission of the European Space Agency will provide weak gravitational lensing and galaxy clustering surveys that can be used to constrain the standard cosmological model and its extensions, with an opportunity to test the properties of dark matter beyond the minimal cold dark matter paradigm. We present forecasts from the combination of these surveys on the parameters describing four interesting and representative non-minimal dark matter models: a mixture of cold and warm dark matter relics; unstable dark matter decaying either into massless or massive relics; and dark matter experiencing feeble interactions with relativistic relics. We model these scenarios at the level of the non-linear matter power spectrum using emulators trained on dedicated N-body simulations. We use a mock Euclid likelihood to fit mock data and infer error bars on dark matter parameters marginalised over other parameters. We find that the Euclid photometric probe (alone or in combination with CMB data from the Planck satellite) will be sensitive to the effect of each of the four dark matter models considered here. The improvement will be particularly spectacular for decaying and interacting dark matter models. With Euclid, the bounds on some dark matter parameters can improve by up to two orders of magnitude compared to current limits. We discuss the dependence of predicted uncertainties on different assumptions: inclusion of photometric galaxy clustering data, minimum angular scale taken into account, modelling of baryonic feedback effects. We conclude that the Euclid mission will be able to measure quantities related to the dark sector of particle physics with unprecedented sensitivity. This will provide important information for model building in high-energy physics. Any hint of a deviation from the minimal cold dark matter paradigm would have profound implications for cosmology and particle physics.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18169-b31b1b.svg)](https://arxiv.org/abs/2406.18169) | **Timing and Scintillation Studies of Pulsars in Globular Cluster M3 (NGC 5272) with FAST**  |
|| B. Li, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-06-27*|
|*Comments*| *14 pages, 4 figures, accepted for publication in The Astrophysical Journal*|
|**Abstract**|            We present the phase-connected timing solutions of all the five pulsars in globular cluster (GC) M3 (NGC 5272), namely PSRs M3A to F (PSRs J1342+2822A to F), with the exception of PSR M3C, from FAST archival data. In these timing solutions, those of PSRs M3E, and F are obtained for the first time. We find that PSRs M3E and F have low mass companions, and are in circular orbits with periods of 7.1 and 3.0 days, respectively. For PSR M3C, we have not detected it in all the 41 observations. We found no X-ray counterparts for these pulsars in archival Chandra images in the band of 0.2-20 keV. We noticed that the pulsars in M3 seem to be native. From the Auto-Correlation Function (ACF) analysis of the M3A's and M3B's dynamic spectra, the scintillation timescale ranges from $7.0\pm0.3$ min to $60.0\pm0.6$ min, and the scintillation bandwidth ranges from $4.6\pm0.2$ MHz to $57.1\pm1.1$ MHz. The measured scintillation bandwidths from the dynamic spectra indicate strong scintillation, and the scattering medium is anisotropic. From the secondary spectra, we captured a scintillation arc only for PSR M3B with a curvature of $649\pm23 {\rm m}^{-1} {\rm mHz}^{-2}$.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18303-b31b1b.svg)](https://arxiv.org/abs/2406.18303) | **A population of mid-infrared large-amplitude variable young stellar objects from unTimely**  |
|| <mark>J. Li</mark>, T. Wang |
|*Appeared on*| *2024-06-27*|
|*Comments*| *22 pages, 20 figures*|
|**Abstract**|            Utilizing a decade-long unTimely dataset, supplemented by multi-band data from archives, we search for young stellar objects (YSOs) with variations larger than one magnitude in W1 band within a region of 110 square degrees in the Galactic plane, covered by VISTA Variables in the Via Lactea (VVV). A total of 641 candidate YSOs have been identified. We classified them into bursts, dips, faders, seculars, and irregulars. Within the burst category, 18 sources were identified as FUor candidates and 1 as an EXor candidate. Irregulars are the most prevalent in the sample. In both bursts and faders, the redder sources tend to show a pattern of bluer when brighter, whereas the bluer sources display the opposite trend, possibly related to the accretion structure of YSOs at different stages. Finally, we obtained the recurrence time scale for FUor eruptions at various stages of YSO evolution. Our findings indicate that younger YSOs generally experience more frequent eruptions compared to older ones.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.18317-b31b1b.svg)](https://arxiv.org/abs/2406.18317) | **ANDES, the high-resolution spectrograph for the ELT: RIZ Spectrograph preliminary design**  |
|| B. Chazelas, et al. -- incl., <mark>M. lehmitz</mark> |
|*Appeared on*| *2024-06-27*|
|*Comments*| *Paper submitted to the SPIE astronomical telescope and instrumentation 2024, conference title : Ground-based and Airborne Instrumentation for Astronomy X, paper reference number : 13096-171*|
|**Abstract**|            We present here the preliminary design of the RIZ module, one of the visible spectrographs of the ANDES instrument 1. It is a fiber-fed high-resolution, high-stability spectrograph. Its design follows the guidelines of successful predecessors such as HARPS and ESPRESSO. In this paper we present the status of the spectrograph at the preliminary design stage. The spectrograph will be a warm, vacuum-operated, thermally controlled and fiber-fed echelle spectrograph. Following the phase A design, the huge etendue of the telescope will be reformed in the instrument with a long slit made of smaller fibers. We discuss the system design of the spectrographs system.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/probe_comparison.png', 'tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/BF_comparison.png', 'tmp_2406.18274/./plots/linear/1b.png']
copying  tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/probe_comparison.png to _build/html/
copying  tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/BF_comparison.png to _build/html/
copying  tmp_2406.18274/./plots/linear/1b.png to _build/html/
exported in  _build/html/2406.18274.md
    + _build/html/tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/probe_comparison.png
    + _build/html/tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/BF_comparison.png
    + _build/html/tmp_2406.18274/./plots/linear/1b.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\cosmicfish}{\texttt{CosmicFish}}$
$\newcommand{\montepython}{\texttt{MontePython}}$
$\newcommand{\class}{\texttt{CLASS}}$
$\newcommand{\camb}{\texttt{CAMB}}$
$\newcommand{\bcemu}{\texttt{BCemu}}$
$\newcommand{\AP}{Alcock--Paczy\'nski}$
$\newcommand{\niof}{w_{n(z),i}}$
$\newcommand{\de}{{\rm d}}$
$\newcommand{\loga}{\log_{10}(a_{\rm dark} / {\rm Mpc}^{-1})}$
$\newcommand{\logaxi}{\log_{10}(a_{\rm dark} \xi_{\rm idr}^4 / {\rm Mpc}^{-1})}$
$\newcommand{\istfisher}{\citetalias{Blanchard:2019oqi}}$
$\newcommand{\orcid}[1]{\orcidlink{#1}}$</div>



<div id="title">

# $\Euclid$ preparation

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.18274-b31b1b.svg)](https://arxiv.org/abs/2406.18274)<mark>Appeared on: 2024-06-27</mark> -  _31 pages, 21 figures_

</div>
<div id="authors">

E. Collaboration, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** The $\Euclid$ mission of the European Space Agency will provide weak gravitational lensing and galaxy clustering surveys that can be used to constrain the standard cosmological model and its extensions, with an opportunity to test the properties of dark matter beyond the minimal cold dark matter paradigm.We present forecasts from the combination of the $\Euclid$ weak lensing and photometric galaxy clustering data on the parameters describing four interesting and representative non-minimal dark matter models: a mixture of cold and warm dark matter relics; unstable dark matter decaying either into massless or massive relics; and dark matter experiencing feeble interactions with relativistic relics.We model these scenarios at the level of the non-linear matter power spectrum using emulators trained on dedicated $N$ -body simulations. We use a mock $\Euclid$ likelihood and Monte Carlo Markov Chains to fit mock data and infer error bars on dark matter parameters marginalised over other parameters.We find that the $\Euclid$ photometric probe (alone or in combination with cosmic microwave background data from the $\Planck$ satellite) will be sensitive to the effect of each of the four dark matter models considered here. The improvement will be particularly spectacular for decaying and interacting dark matter models. With $\Euclid$ , the bounds on some dark matter parameters can improve by up to two orders of magnitude compared to current limits. We discuss the dependence of predicted uncertainties on different assumptions: inclusion of photometric galaxy clustering data, minimum angular scale taken into account, modelling of baryonic feedback effects.We conclude that the $\Euclid$ mission will be able to measure quantities related to the dark sector of particle physics with unprecedented sensitivity. This will provide important information for model building in high-energy physics. Any hint of a deviation from the minimal cold dark matter paradigm would have profound implications for cosmology and particle physics.

</div>

<div id="div_fig1">

<img src="tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/probe_comparison.png" alt="Fig12" width="100%"/>

**Figure 12. -** *Left*: edges of the 95\% credible interval on the WDM mass $m_{\rm wdm}$ and fraction $f_{\rm wdm}$ for the CWDM model, with pessimistic assumptions and three data combinations: weak lensing (WL) alone, weak lensing plus galaxy clustering from the photometric survey (3\texttimes2pt), and 3\texttimes2pt combined with \Planck  CMB data. For the 3\texttimes2pt and 3\texttimes2pt + \Planck  data sets, baryonic feedback has been assumed to affect the WL power spectrum but not the GC power spectrum. The posterior is marginalised over other cosmological parameters, baryonic feedback parameters, and nuisance parameters (accounting for bias uncertainty and intrinsic alignment). The model is equivalent to pure $\Lambda$CDM towards the lower horizontal axis (small $f_{\rm wdm}$) and right vertical axis (large $m_{\rm wdm}$). The forecast assumes a flat prior on the mass of thermal WDM (lower axis) and a logarithmic prior on the WDM fraction (left axis), but we show the relation to Dodelson--Widrow masses in the upper axis (see Sect. \ref{sec:theo_CWDM} for definitions).
    *Right*: same with optimistic assumptions.
     (*fig:CWDM_pess_opt*)

</div>
<div id="div_fig2">

<img src="tmp_2406.18274/./plots/notebook_triangle_plots/CWDM/BF_comparison.png" alt="Fig13" width="100%"/>

**Figure 13. -** *Left*: same as Fig. \ref{fig:CWDM_pess_opt} but only for the 3\texttimes2pt dataset and with different assumptions on baryonic feedback (BF): fixed BF (magenta), BF affecting only the weak lensing (WL) power spectrum (orange), or BF affecting both the WL and galaxy clustering  (GC) power spectra (grey). The "truth" is expected to lay between the latter two cases (orange and grey), which give anyway very similar results.
    *Right*: same with optimistic assumptions.
     (*fig:CWDM_BF_comparison*)

</div>
<div id="div_fig3">

<img src="tmp_2406.18274/./plots/linear/1b.png" alt="Fig5" width="100%"/>

**Figure 5. -** 
    Ratio of the linear (solid lines) and non-linear (dashed lines) power spectra of several 1b-DDM models to that of a pure $\Lambda$CDM model with the same cosmological parameters, parameterised by the fraction $f_{\rm ddm}^{\rm ini}$ and the decay rate $\Gamma_{\rm ddm}$. We work in the basis $(f_{\rm ddm}^{\rm ini},\Gamma_{\rm ddm}   f_{\rm ddm}^{\rm ini})$ to show that only the product of the two DDM parameters affects the linear power spectrum. The other parameters ($\Omega_{\rm dm}^{\rm ini}$, $\Omega_{{\rm b}}$, $h$, $A_{\rm s}$, $n_{\rm s}$) are kept fixed, and the spectra are computed today ($z=0$). The non-linear spectra are predicted by the emulator introduced in Sect. \ref{sec:nl_1bddm} and plotted up to the maximum wavenumber at which this emulator is trusted.
     (*fig:lin_1b*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.18274"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

105  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

8  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
