# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

N. Martin  ->  N. Martin  |  ['N. Martin']
F. Zagaria  ->  F. Zagaria  |  ['F. Zagaria']
H. Jiang  ->  H. Jiang  |  ['H. Jiang']
Arxiv has 58 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2501.13148


extracting tarball to tmp_2501.13148... done.
Retrieving document from  https://arxiv.org/e-print/2501.13152


extracting tarball to tmp_2501.13152...

 done.


N. Martin  ->  N. Martin  |  ['N. Martin']




Found 266 bibliographic references in tmp_2501.13152/Main.bbl.
Issues with the citations
syntax error in line 1229: '=' expected
Retrieving document from  https://arxiv.org/e-print/2501.13877
extracting tarball to tmp_2501.13877...

 done.


F. Zagaria  ->  F. Zagaria  |  ['F. Zagaria']
H. Jiang  ->  H. Jiang  |  ['H. Jiang']


Unable to get page count.
Syntax Error: Couldn't find trailer dictionary
Syntax Error: Couldn't find trailer dictionary
Syntax Error: Couldn't read xref table



### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.13152-b31b1b.svg)](https://arxiv.org/abs/2501.13152) | **The Hubble Space Telescope Survey of M31 Satellite Galaxies IV. Survey Overview and Lifetime Star Formation Histories**  |
|| A. Savino, et al. -- incl., <mark>N. Martin</mark> |
|*Appeared on*| *2025-01-24*|
|*Comments*| *Accepted for publication on ApJ. 47 pages, 24 figures, 12 tables. Corresponding HLSP data can be retrieved at: this https URL*|
|**Abstract**|            From $>1000$ orbits of HST imaging, we present deep homogeneous resolved star color-magnitude diagrams that reach the oldest main sequence turnoff and uniformly measured star formation histories (SFHs) of 36 dwarf galaxies ($-6 \ge M_V \ge -17$) associated with the M31 halo, and for 10 additional fields in M31, M33, and the Giant Stellar Stream. From our SFHs we find: i) the median stellar age and quenching epoch of M31 satellites correlate with galaxy luminosity and galactocentric distance. Satellite luminosity and present-day distance from M31 predict the satellite quenching epoch to within $1.8$ Gyr at all epochs. This tight relationship highlights the fundamental connection between satellite halo mass, environmental history, and star formation duration. ii) There is no difference between the median SFH of galaxies on and off the great plane of Andromeda satellites. iii) $\sim50$\% of our M31 satellites show prominent ancient star formation ($>12$ Gyr ago) followed by delayed quenching ($8-10$ Gyr ago), which is not commonly observed among the MW satellites. iv) A comparison with TNG50 and FIRE-2 simulated satellite dwarfs around M31-like hosts show that some of these trends (dependence of SFH on satellite luminosity) are reproduced in the simulations while others (dependence of SFH on galactocentric distance, presence of the delayed-quenching population) are weaker or absent. We provide all photometric catalogs and SFHs as High-Level Science Products on MAST.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.13148-b31b1b.svg)](https://arxiv.org/abs/2501.13148) | **Using simulation based inference on tidally perturbed dwarf galaxies: the dynamics of NGC205**  |
|| A. Widmark, K. V. Johnston |
|*Appeared on*| *2025-01-24*|
|*Comments*| *18 pages, 8 figures; appendix adds 2 pages, 2 figures*|
|**Abstract**|            We develop a novel approach to performing precision inference on tidally perturbed dwarf galaxies. We use a Bayesian inference framework of implicit likelihood inference, previously applied mainly in the field of cosmology, based on forward simulation, data compression, and likelihood emulation with neural density estimators. We consider the case of NGC205, a satellite of M31. NGC205 exhibits an S-shape in the mean line-of-sight velocity along its semi-major spatial axis, suggestive of tidal perturbation. We demonstrate that this velocity profile can be qualitatively reproduced even if NGC205 was in a spherically symmetric and isotropic state before its most recent pericenter passage. We apply our inference method to mock data and show that the precise shape of a perturbed satellite's sky-projected internal velocity field can be highly informative of both its orbit and total mass density profile, even in the absence of proper motion information. For the actual NGC205, our method is hampered because the available data only covers a line along its semi-major axis, rather than the full sky-projected field. This shortcoming could be addressed with another round of observations.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.13877-b31b1b.svg)](https://arxiv.org/abs/2501.13877) | **Dust characterization of protoplanetary disks: a guide to multi-wavelength analyses and accurate dust mass measurements**  |
|| E. M. Viscardi, et al. -- incl., <mark>F. Zagaria</mark>, <mark>H. Jiang</mark> |
|*Appeared on*| *2025-01-24*|
|*Comments*| **|
|**Abstract**|            Multi-wavelength dust continuum observations of protoplanetary disks are essential for accurately measuring two key ingredients of planets formation theories: the dust mass and grain size. Unfortunately, they are also extremely time-expensive. We aim to investigate the most economic way of performing this analysis. We benchmark the dust characterization analysis on multi-wavelength observations of two disk models. We test three different combinations of bands (in the 0.45 mm $\to$ 7.46 mm range) to see how optically thick and thin observations aid the reconstruction of the dust properties for different morphologies and in three different dust mass regimes. We also test different spatial resolutions. Dust properties are robustly measured in a multi-band analysis if optically thin observations are included. For typical disks, this requires wavelengths longer than 3 mm. High-resolution (< 0.03"-0.05") is fundamental to resolve the changes in dust content of substructures. However, lower-resolution results still provide an accurate measurement of the total dust mass and of the level of grain growth of rings. Additionally, we propose a new approach that successfully combines lower and higher resolution observations in the multi-wavelength analysis without losing spatial information. We also test individually enhancing the resolution of each radial intensity profile with Frank but we note the presence of artifacts. Finally, we discuss on the total dust mass that we derive from the SED analyses and compare it with the traditional method of deriving dust masses from millimeter fluxes. Accurate dust mass measurements from the SED analysis can be derived by including optically thin tracers. On the other hand, single-wavelength flux-based masses are always underestimated by even more than one order of magnitude.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error Unable to get page count.
Syntax Warning: May not be a PDF file (continuing anyway)
Syntax Error: Couldn't find trailer dictionary
Syntax Error: Couldn't find trailer dictionary
Syntax Error: Couldn't read xref table
</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2501.13152/./SFH_Distance.png', 'tmp_2501.13152/./A11_Culling.png', 'tmp_2501.13152/./Mosaic_SFHs.png']
copying  tmp_2501.13152/./SFH_Distance.png to _build/html/
copying  tmp_2501.13152/./A11_Culling.png to _build/html/
copying  tmp_2501.13152/./Mosaic_SFHs.png to _build/html/
exported in  _build/html/2501.13152.md
    + _build/html/tmp_2501.13152/./SFH_Distance.png
    + _build/html/tmp_2501.13152/./A11_Culling.png
    + _build/html/tmp_2501.13152/./Mosaic_SFHs.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\A}[1]{And~{\sc #1}}$
$\newcommand{\refs}{\textbf{REFS}}$</div>



<div id="title">

# The Hubble Space Telescope Survey of M31 Satellite Galaxies IV. \ Survey Overview and Lifetime Star Formation Histories

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.13152-b31b1b.svg)](https://arxiv.org/abs/2501.13152)<mark>Appeared on: 2025-01-24</mark> -  _Accepted for publication on ApJ. 47 pages, 24 figures, 12 tables. Corresponding HLSP data can be retrieved at: this https URL_

</div>
<div id="authors">

A. Savino, et al. -- incl., <mark>N. Martin</mark>

</div>
<div id="abstract">

**Abstract:** From $>1000$ orbits of HST imaging, we present deep homogeneous resolved star color-magnitude diagrams that reach the oldest main sequence turnoff and uniformly measured star formation histories (SFHs) of 36 dwarf galaxies ( $-6 \ge M_V \ge -17$ ) associated with the M31 halo, and for 10 additional fields in M31, M33, and the Giant Stellar Stream.From our SFHs we find: i) the median stellar age and quenching epoch of M31 satellites correlate with galaxy luminosity and galactocentric distance. Satellite luminosity and present-day distance from M31 predict the satellite quenching epoch to  within $1.8$ Gyr at all epochs. This tight relationship highlights the fundamental connection between satellite halo mass, environmental history, and star formation duration. ii) There is no difference between the median SFH of galaxies on and off the great plane of Andromeda satellites. iii) $\sim50$ \% of our M31 satellites show prominent ancient star formation ( $>12$ Gyr ago) followed by delayed quenching ( $8-10$ Gyr ago), which is not commonly observed among the MW satellites. iv) A comparison withTNG50 and FIRE-2 simulated satellite dwarfs around M31-like hosts show that some of these trends (dependence of SFH on satellite luminosity) are reproduced in the simulations while others (dependence of SFH on galactocentric distance, presence of the delayed-quenching population) are weaker or absent. We provide all photometric catalogs and SFHs as High-Level Science Products on MAST.

</div>

<div id="div_fig1">

<img src="tmp_2501.13152/./SFH_Distance.png" alt="Fig14" width="100%"/>

**Figure 14. -** Median star formation epoch ($\tau_{50}$, left), and quenching epoch ($\tau_{q}$, right) as function of distance from M31 (taken from \citealt{Savino22}). The symbols are color-coded by the value of absolute luminosity. Star forming galaxies are shown as star symbols. Notable outliers from the general trend are highlighted by their name. (*Fig:Tq_DM31*)

</div>
<div id="div_fig2">

<img src="tmp_2501.13152/./A11_Culling.png" alt="Fig8" width="100%"/>

**Figure 8. -** The ACS CMD of $\A${XI}($M_V = -6.4$), at different steps of our contaminant cleaning procedure. A) Raw photometric catalog, as output from DOLPHOT. B) Photometric catalog, after the quality cuts of \S \ref{Sec:Culling} have been applied. C) Photometric catalog, after the bright-star mask (\S \ref{Sec:Masks}) has been applied. D) Final photometric catalog, after the spatial cut of \S \ref{Sec:SpatialCut} has been applied.  (*Fig:Culling*)

</div>
<div id="div_fig3">

<img src="tmp_2501.13152/./Mosaic_SFHs.png" alt="Fig12" width="100%"/>

**Figure 12. -** Cumulative SFHs for the 36 dwarf galaxies in our sample, measured from the ACS fields. The black line shows the best-fit SFH, the yellow region show the statistical uncertainties and the grey region show the systematic uncertainties. The epoch of reionization is highlighted by the red shaded region \citep[$6 < z < 10$, corresponding to $13.33 Gyr < t < 12.87 Gyr$ with Planck cosmological parameters,][]{Planck20,Robertson22}. The galaxies are ordered by absolute luminosity. (*Fig:SFHs*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.13152"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

315  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
