# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
E. Bañados  ->  E. Bañados  |  ['E. Bañados']
S. Bosman  ->  S. Bosman  |  ['S. Bosman']
F. Walter  ->  F. Walter  |  ['F. Walter']
S. Deshmukh  ->  S. Deshmukh  |  ['S. Deshmukh']


Arxiv has 74 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2402.01835


extracting tarball to tmp_2402.01835... done.


S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
Retrieving document from  https://arxiv.org/e-print/2402.01844


Unable to locate Ghostscript on paths


extracting tarball to tmp_2402.01844...

 done.


E. Bañados  ->  E. Bañados  |  ['E. Bañados']
S. Bosman  ->  S. Bosman  |  ['S. Bosman']
F. Walter  ->  F. Walter  |  ['F. Walter']


list index out of range
Retrieving document from  https://arxiv.org/e-print/2402.02476


extracting tarball to tmp_2402.02476...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2402.01844-b31b1b.svg)](https://arxiv.org/abs/arXiv:2402.01844) | **A Massive Protocluster Anchored by a Luminous Quasar at $z=6.63$**  |
|| F. Wang, et al. -- incl., <mark>E. Bañados</mark>, <mark>S. Bosman</mark>, <mark>F. Walter</mark> |
|*Appeared on*| *2024-02-06*|
|*Comments*| *Accepted for publication in ApJL*|
|**Abstract**| Protoclusters, the progenitors of galaxy clusters, trace large scale structures in the early Universe and are important to our understanding of structure formation and galaxy evolution. To date, only a handful of protoclusters have been identified in the Epoch of Reionization (EoR). As one of the rarest populations in the early Universe, distant quasars that host active supermassive black holes are thought to reside in the most massive dark matter halos at that cosmic epoch, and could thus potentially pinpoint some of the earliest protoclusters. In this letter, we report the discovery of a massive protocluster around a luminous quasar at $z=6.63$. This protocluster is anchored by the quasar, and includes three [CII] emitters at $z\sim6.63$, 12 spectroscopically confirmed Ly$\alpha$ emitters (LAEs) at $6.54<z\le6.64$, and a large number of narrow-band imaging selected LAE candidates at the same redshift. This structure has an overall overdensity of $\delta=3.3^{+1.1}_{-0.9}$ within $\sim35\times74$ cMpc$^2$ on the sky and an extreme overdensity of $\delta>30$ in its central region (i.e., $R\lesssim2$ cMpc). We estimate that this protocluster will collapse into a galaxy cluster with a mass of $6.9^{+1.2}_{-1.4}\times10^{15}~M_\odot$ at the current epoch, more massive than the most massive clusters known in the local Universe such as Coma. In the quasar vicinity, we discover a double-peaked LAE which implies that the quasar has a UV lifetime greater than 0.8 Myrs and has already ionized its surrounding intergalactic medium. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2402.02476-b31b1b.svg)](https://arxiv.org/abs/arXiv:2402.02476) | **Constraints on Triton atmospheric evolution from occultations: 1989-2022**  |
|| B. Sicardy, et al. -- incl., <mark>S. Deshmukh</mark> |
|*Appeared on*| *2024-02-06*|
|*Comments*| *8 pages, 4 figures, accepted for publication in Astronomy and Astrophysics*|
|**Abstract**| Context - Around the year 2000, Triton's south pole experienced an extreme summer solstice that occurs every about 650 years, when the subsolar latitude reached about 50{\deg}. Bracketing this epoch, a few occultations probed Triton's atmosphere in 1989, 1995, 1997, 2008 and 2017. A recent ground-based stellar occultation observed on 6 October 2022 provides a new measurement of Triton's atmospheric pressure which is presented here. Aims- The goal is to constrain the Volatile Transport Models (VTMs) of Triton's atmosphere that is basically in vapor pressure equilibrium with the nitrogen ice at its surface. Methods - Fits to the occultation light curves yield Triton's atmospheric pressure at the reference radius 1400 km, from which the surface pressure is induced. Results - The fits provide a pressure p_1400= 1.211 +/- 0.039 microbar at radius 1400 km (47 km altitude), from which a surface pressure of p_surf= 14.54 +/- 0.47 microbar is induced (1-sigma error bars). To within error bars, this is identical to the pressure derived from the previous occultation of 5 October 2017, p_1400 = 1.18 +/- 0.03 microbar and p_surf= 14.1 +/- 0.4 microbar, respectively. Based on recent models of Triton's volatile cycles, the overall evolution over the last 30 years of the surface pressure is consistent with N2 condensation taking place in the northern hemisphere. However, models typically predict a steady decrease in surface pressure for the period 2005-2060, which is not confirmed by this observation. Complex surface-atmosphere interactions, such as ice albedo runaway and formation of local N2 frosts in the equatorial regions of Triton could explain the relatively constant pressure between 2017 and 2022. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2402.01835-b31b1b.svg)](https://arxiv.org/abs/arXiv:2402.01835) | **Obscuration in high redshift jetted QSO**  |
|| A. Caccianiga, et al. -- incl., <mark>S. Belladitta</mark> |
|*Appeared on*| *2024-02-06*|
|*Comments*| *14 pages, 5 figures. Accepted for publication on A&A*|
|**Abstract**| Obscuration in high-redshift quasi-stellar objects (QSO) has a profound impact on our understanding of the evolution of supermassive black holes across the cosmic time. An accurate quantification of its relevance is therefore mandatory. We present a study aimed at evaluating the importance of obscuration in high redshift jetted QSO, i.e. those active nuclei characterized by the presence of powerful relativistic jets. We compare the observed number of radio detected QSO at different radio flux density limits with the value predicted by the beaming model on the basis of the number of oriented sources (blazars). Any significant deficit of radio-detected QSO compared to the predictions can be caused by the presence of obscuration along large angles from the jet direction. We apply this method to two sizable samples characterized by the same optical limit (mag=21) but significantly different radio density limits (30 mJy and 1 mJy respectively) and containing a total of 87 independent radio-loud 4<z<6.8 QSO, 31 of which classified as blazars. We find a general good agreement between the numbers predicted by the model and those actually observed, with only a marginal discrepancy at 0.5 mJy that could be caused by the lack of completeness of the sample. We conclude that we have no evidence of obscuration within angles 10-20deg from the relativistic jet direction. We also show how the ongoing deep wide-angle radio surveys will be instrumental to test the presence of obscuration at much larger angles, up to 30-35deg. We finally suggest that, depending on the actual fraction of obscured QSO, relativistic jets could be much more common at high redshifts compared to what is usually observed in the local Universe |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error Unable to locate Ghostscript on paths</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2402.01844.md
    + _build/html/tmp_2402.01844/figures/fig_map.png
    + _build/html/tmp_2402.01844/figures/fig_surface_density.png
    + _build/html/tmp_2402.01844/figures/fig_overdensity.png
    + _build/html/tmp_2402.01844/./figures/fig_doublepeak_Lya.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\url}[1]{\href{#1}{#1}}$
$\newcommand{\dodoi}[1]{doi:~\href{http://doi.org/#1}{\nolinkurl{#1}}}$
$\newcommand{\doeprint}[1]{\href{http://ascl.net/#1}{\nolinkurl{http://ascl.net/#1}}}$
$\newcommand{\doarXiv}[1]{\href{https://arxiv.org/abs/#1}{\nolinkurl{https://arxiv.org/abs/#1}}}$
$\newcommand{\}{natexlab}$</div>



<div id="title">

# A Massive Protocluster Anchored by a Luminous Quasar at $z=6.63$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2402.01844-b31b1b.svg)](https://arxiv.org/abs/2402.01844)<mark>Appeared on: 2024-02-06</mark> -  _Accepted for publication in ApJL_

</div>
<div id="authors">

F. Wang, et al. -- incl., <mark>E. Bañados</mark>, <mark>S. Bosman</mark>, <mark>F. Walter</mark>

</div>
<div id="abstract">

**Abstract:** Protoclusters, the progenitors of galaxy clusters, trace large scale structures in the early Universe and are important to our understanding of structure formation and galaxy evolution. To date, only a handful of protoclusters have been identified in the Epoch of Reionization (EoR). As one of the rarest populations in the early Universe, distant quasars that host active supermassive black holes are thought to reside in the most massive dark matter halos at that cosmic epoch, and could thus potentially pinpoint some of the earliest protoclusters. In this letter, we report the discovery of a massive protocluster around a luminous quasar at $z=6.63$ . This protocluster is anchored by the quasar, and includes three [ $\ion{C}{2}$ ] emitters at $z\sim6.63$ , 12 spectroscopically confirmed Ly $\alpha$ emitters (LAEs) at $6.54<z\le6.64$ , and a large number of narrow-band imaging selected LAE candidates at the same redshift. This structure has an overall overdensity of $\delta=3.3^{+1.1}_{-0.9}$ within $\sim35\times74$ cMpc $^2$ on the skyand an extreme overdensity of $\delta>30$ in its central region (i.e., $R\lesssim2$ cMpc). We estimate that this protocluster will collapse into a galaxy cluster with a mass of $6.9^{+1.2}_{-1.4}\times10^{15} M_\odot$ at the current epoch, more massive than the most massive clusters known in the local Universe such as Coma. In the quasar vicinity, we discover a double-peaked LAE which implies that the quasar has a UV lifetime greater than 0.8 Myrs and has already ionized its surrounding intergalactic medium.

</div>

<div id="div_fig1">

<img src="tmp_2402.01844/figures/fig_map.png" alt="Fig2" width="100%"/>

**Figure 2. -** \small** a,** Large scale overdensity of LAEs around quasar J0910--0414.
The background image is HSC $z$-band image.
The red dot denotes the position of the quasar.
The blue dots represent LAE candidates identified from deep Subaru imaging,
the green dots represent spectroscopically confirmed LAEs at $z>6.5$,
and the orange point represents a spectroscopically confirmed low-$z$ galaxy.
The overdensity of LAEs within the yellow box (14$\times$30 arcmin$^2$, or $\sim35\times74$ cMpc$^2$) is $\delta=4.3^{+1.1}_{-0.9}$.
The colored contours are overdensity isolines of LAEs which were estimated using a quartic kernel and a grid size of 1 arcmin. The blue, light blue, pink, and red lines denote
overdensity $\delta=3, 4, 5,$ and $6$, respectively.
The black dashed and solid lines represent $r=50$ cMpc (at $z=6.63$) and $r=40'$($\sim 99$ cMpc at $z=6.63$), respectively. The average LAE surface density was estimated using galaxies located between these two lines.
** b,** Small scale overdensity of [$\ion${C}{2}] emitting galaxies identified from ALMA observations.
** c,**
The zoom-in [$\ion${C}{2}] flux integrated map of the quasar host galaxy and a satellite galaxy (C) with [$\ion${C}{2}] emission.
 (*fig:map*)

</div>
<div id="div_fig2">

<img src="tmp_2402.01844/figures/fig_surface_density.png" alt="Fig4.1" width="50%"/><img src="tmp_2402.01844/figures/fig_overdensity.png" alt="Fig4.2" width="50%"/>

**Figure 4. -** \small**Left,**
 Surface density of LAE candidates as a function of narrow band magnitude.
 The density of LAE in the central $\sim14\times30$ arcmin$^2$ region (solid red points) is $4.3^{+1.1}_{-0.9}$ times higher than that measured in the outskirt ($r>50  {\rm cMpc}$) of our HSC field and that measured in other LAE surveys \citep{Ouchi10, Shibuya18, Ono21}.
**Right,**
Cumulative overdensity profile of galaxies in quasar fields. The orange squares and red dots denote the overdensity profiles measured in J0910--0414 field for [$\ion${C}{2}] emitters and LAEs, respectively. The black open squares and open circles denote the overdensity profiles of [$\ion${C}{2}] emitters and LAEs measured from three quasar fields with [$\ion${C}{2}] companion galaxies.
The black dotted line represents the expected overdensity profile of LAEs in quasar fields \citep{Shen07,Ouchi18,Garcia19}. The magenta dashed line denotes the exptected overdensity profile of CO emitters in quasar fields \citep{Garcia22}. The blue dot-dashed line shows the expected overdensity profile of LAEs in average protocluster fields \citep{Hennawi15}.
 (*fig:density*)

</div>
<div id="div_fig3">

<img src="tmp_2402.01844/./figures/fig_doublepeak_Lya.png" alt="Fig1" width="100%"/>

**Figure 1. -** **A double peaked LAE identified in the highly ionized quasar vicinity.**
The red asterisk indicates the position of the quasar while the black circle highlight the relative position of the LAE-11 to the quasar.
The black line shows the spectrum of the LAE and the gray line denotes the error vector.
LAE-11 shows a widely separated double-peaked $\rm Ly\alpha$ line. The detection of the blue peak of the double peaked Ly$\alpha$ line in this galaxy indicates that the strong quasar radiation has ionized its vicinity.
 (*fig:escape*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2402.01844"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

367  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
