# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

F. Walter  ->  F. Walter  |  ['F. Walter']
S. Li  ->  S. Li  |  ['S. Li']
K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
Arxiv has 55 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2408.00063


extracting tarball to tmp_2408.00063...

 done.
Retrieving document from  https://arxiv.org/e-print/2408.00065



  exec(code_obj, self.user_global_ns, self.user_ns)
'PosixPath' object is not subscriptable


extracting tarball to tmp_2408.00065...

 done.
Retrieving document from  https://arxiv.org/e-print/2408.00078


extracting tarball to tmp_2408.00078...

 done.
Retrieving document from  https://arxiv.org/e-print/2408.00268


extracting tarball to tmp_2408.00268...

 done.
Retrieving document from  https://arxiv.org/e-print/2408.00609


extracting tarball to tmp_2408.00609...

 done.
Retrieving document from  https://arxiv.org/e-print/2408.00625


not a gzip file


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2408.00065-b31b1b.svg)](https://arxiv.org/abs/2408.00065) | **Tip of the Red Giant Branch Distances with JWST. II. I-band Measurements in a Sample of Hosts of 9 SN Ia Match HST Cepheids**  |
|| <mark>S. Li</mark>, et al. |
|*Appeared on*| *2024-08-02*|
|*Comments*| *15 pages, 5 figures, 4 tables, submitted to ApJ, comments welcome*|
|**Abstract**|            The Hubble Tension, a >5 sigma discrepancy between direct and indirect measurements of the Hubble constant (H0), has persisted for a decade and motivated intense scrutiny of the paths used to infer H0. Comparing independently-derived distances for a set of galaxies with different standard candles, such as the tip of the red giant branch (TRGB) and Cepheid variables, can test for systematics in the middle rung of the distance ladder. The I band is the preferred filter for measuring the TRGB due to constancy with color, a result of low sensitivity to population differences in age and metallicity supported by stellar models. We use James Webb Space Telescope (JWST) observations with the maser host NGC 4258 as our geometric anchor to measure I-band (F090W vs F090W-F150W) TRGB distances to 7 hosts of 9 Type Ia supernovae (SNe Ia) within 27 Mpc: NGC 1448, NGC 1559, NGC 2525, NGC 3370, NGC 3447, NGC 5584, and NGC 5643. We compare these with Hubble Space Telescope (HST) Cepheid-based relative distance moduli for the same galaxies and anchor. We find no evidence of a difference between their weighted means, 0.01 +/- 0.04 (stat) +/- 0.04 (sys) mag. We produce fourteen variants of the TRGB analysis, altering the smoothing level and color range used to measure the tips to explore their impact. For some hosts, this changes the identification of the strongest peak, but this causes little change to the sample mean difference producing a full range of 0.01 to 0.03 mag, all consistent at 1 sigma with no difference. The result matches past comparisons of I-band TRGB and Cepheids when both use HST. SNe and anchor samples observed with JWST are too small to yield a measure of H0 that is competitive with the HST sample of 42 SNe Ia and 4 anchors; however, they already provide a vital systematic crosscheck to HST measurements of the distance ladder.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2408.00078-b31b1b.svg)](https://arxiv.org/abs/2408.00078) | **Searching for New Cataclysmic Variables in the Chandra Source Catalog**  |
|| I. Galiullin, et al. -- incl., <mark>K. El-Badry</mark> |
|*Appeared on*| *2024-08-02*|
|*Comments*| *20 pages, 15 figures and 8 tables. Accepted for publication in Astronomy & Astrophysics*|
|**Abstract**|            Cataclysmic variables (CVs) are compact binary systems in which a white dwarf accretes matter from a Roche-lobe-filling companion star. In this study, we searched for new CVs in the Milky Way in the Chandra Source Catalog v2.0, cross-matched with Gaia Data Release 3 (DR3). We identified new CV candidates by combining X-ray and optical data in a color-color diagram called the ``X-ray Main Sequence". We used two different cuts in this diagram to compile pure and optically variable samples of CV candidates. We undertook optical spectroscopic follow-up observations with the Keck and Palomar Observatories to confirm the nature of these sources. We assembled a sample of 25,887 Galactic X-ray sources and found 14 new CV candidates. Seven objects show X-ray and/or optical variability. All sources show X-ray luminosity in the $\rm 10^{29}-10^{32}$ $\rm erg\ s^{-1}$ range, and their X-ray spectra can be approximated by a power-law model with photon indices in the $\rm \Gamma \sim 1-3$ range or an optically thin thermal emission model in the $\rm kT \sim 1-70$ keV range. We spectroscopically confirmed four CVs, discovering two new polars, one low accretion rate polar and a WZ~Sge-like low accretion rate CV. X-ray and optical properties of the other 9 objects suggest that they are also CVs (likely magnetic or dwarf novae), and one other object could be an eclipsing binary, but revealing their true nature requires further observations. These results show that a joint X-ray and optical analysis can be a powerful tool for finding new CVs in large X-ray and optical catalogs. X-ray observations such as those by Chandra are particularly efficient at discovering magnetic and low accretion rate CVs, which could be missed by purely optical surveys.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2408.00268-b31b1b.svg)](https://arxiv.org/abs/2408.00268) | **Prospects for Cosmological Research with the FAST Array: 21-cm Intensity Mapping Survey Observation Strategies**  |
|| J.-D. Pan, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-08-02*|
|*Comments*| *9 pages, 5 figures*|
|**Abstract**|            Precise cosmological measurements are essential for understanding the evolution of the universe and the nature of dark energy. The Five-hundred-meter Aperture Spherical Telescope (FAST), the most sensitive single-dish radio telescope, has the potential to provide the precise cosmological measurements through neutral hydrogen 21-cm intensity mapping sky survey. This paper primarily explores the potential of technological upgrades for FAST in cosmology. The most crucial upgrade begins with equipping FAST with a wide-band receiver ($0 < z < 2.5$). This upgrade can enable FAST to achieve higher precision in cosmological parameter estimation than the Square Kilometre Array Phase-1 Mid frequency. On this basis, expanding to a FAST array (FASTA) consisting of six identical FAST would offer significant improvements in precision compared to FAST. Additionally, compared with the current results from the data combination of cosmic microwave background, baryon acoustic oscillations (optical galaxy surveys), and type Ia supernovae, FASTA can provide comparable constraints. Specifically, for the dark-energy equation-of-state parameters, FASTA can achieve $\sigma(w_0) = 0.09$ and $\sigma(w_a) = 0.33$.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2408.00609-b31b1b.svg)](https://arxiv.org/abs/2408.00609) | **Nanohertz gravitational waves from a quasar-based supermassive black hole binary population model as dark sirens**  |
|| S.-R. Xiao, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-08-02*|
|*Comments*| *15 pages, 7 figures*|
|**Abstract**|            Recently, several pulsar timing array (PTA) projects have detected evidence of the existence of a stochastic gravitational wave background (SGWB) in the nanohertz frequency band, providing confidence in detecting individual supermassive black hole binaries (SMBHBs) in the future. Nanohertz GWs emitted by inspiraling SMBHBs encode the luminosity distances of SMBHBs. They can serve as dark sirens to explore the cosmic expansion history via a statistical method to obtain the redshift information of GW sources' host galaxies using galaxy catalogs. The theoretical analysis of the dark siren method relies on the modeling of the population of SMBHBs. Using a population model consistent with the latest SGWB observations is essential, as the SGWB provides significant information about the distribution of SMBHBs. In this work, we employ a quasar-based model, which can self-consistently account for the SGWB amplitude, to estimate the population of SMBHBs. We constrain the Hubble constant using the mock GW data from different detection cases of PTAs in the future. Our results show that a PTA consisting of 100 pulsars with a white noise level of 20 ns could measure the Hubble constant with a precision close to $1\%$ over a 10-year observation period, and a PTA with 200 pulsars may achieve this goal over a 5-year observation period. The results indicate that modeling the SMBHB population significantly influences the analysis of dark sirens, and SMBHB dark sirens have the potential to be developed as a valuable cosmological probe.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2408.00063-b31b1b.svg)](https://arxiv.org/abs/2408.00063) | **An ALMA survey of submillimetre galaxies in the Extended Chandra Deep Field South: an unbiased study of SMG environments measured with narrowband imaging**  |
|| T. M. Cornish, et al. -- incl., <mark>F. Walter</mark> |
|*Appeared on*| *2024-08-02*|
|*Comments*| *21 pages, 12 figures. Accepted for publication in MNRAS*|
|**Abstract**|            Submillimetre galaxies (SMGs) are some of the most extreme star-forming systems in the Universe, whose place in the framework of galaxy evolution is as yet uncertain. It has been hypothesised that SMGs are progenitors of local early-type galaxies, requiring that SMGs generally reside in galaxy cluster progenitors at high redshift. We test this hypothesis and explore SMG environments using a narrowband VLT/HAWK-I+GRAAL study of H$\alpha$ and [OIII] emitters around an unbiased sample of three ALMA-identified and spectroscopically-confirmed SMGs at $z \sim 2.3$ and $z \sim 3.3$, where these SMGs were selected solely on spectroscopic redshift. Comparing with blank-field observations at similar epochs, we find that one of the three SMGs lies in an overdensity of emission-line sources on the $\sim4$ Mpc scale of the HAWK-I field of view, with overdensity parameter $\delta_{g} = 2.6^{+1.4}_{-1.2}$. A second SMG is significantly overdense only on $\lesssim 1.6$ Mpc scales and the final SMG is consistent with residing in a blank field environment. The total masses of the two overdensities are estimated to be $\log(M_{h}/{\rm M}_{\odot}) =$12.1--14.4, leading to present-day masses of $\log(M_{h,z=0}/{\rm M}_{\odot}) =$12.9--15.9. These results imply that SMGs occupy a range of environments, from overdense protoclusters or protogroups to the blank field, suggesting that while some SMGs are strong candidates for the progenitors of massive elliptical galaxies in clusters, this may not be their only possible evolutionary pathway.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error 'PosixPath' object is not subscriptable</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2408.00625-b31b1b.svg)](https://arxiv.org/abs/2408.00625) | **Nitrogen Loss from Pluto's Birth to the Present Day via Atmospheric Escape, Photochemical Destruction, and Impact Erosion**  |
|| P. E. Johnson, L. A. Young, D. Nesvorny, <mark>X. Zhang</mark> |
|*Appeared on*| *2024-08-02*|
|*Comments*| *accepted for publication in the Planetary Science Journal*|
|**Abstract**|            We estimate the loss of nitrogen from Pluto over its lifetime, including the giant planet instability period, which we term the "Wild Years." We analyze the orbital migration of 53 simulated Plutinos, which are Kuiper Belt Objects (KBOs) captured into 3:2 mean-motion resonance with Neptune during the instability. This orbital migration brought the Plutinos from 20 to 30 au to their present-day orbits near 40 au along a nonlinear path that includes orbits with semimajor axes from 10 to 100 au. We model the thermal history that results from this migration and estimate the volatile loss rates due to the ever-changing thermal environment. Due to the early Sun's enhanced ultraviolet radiation, the photochemical destruction rate during the Wild Years was a factor of 100 higher than the present-day rate, but this only results in a loss of ~10 m global equivalent layer (GEL). The enhanced Jeans escape rate varies wildly with time, and a net loss of ~100 cm GEL is predicted. Additionally, we model the impact history during the migration and find that impacts are a net source, not loss, of N2, contributing ~100 cm GEL. The 100 cm GEL is 0.1% of the amount of N2 in Sputnik Planitia. We therefore conclude that Pluto did not lose an excessive amount of volatiles during the Wild Years, and its primordial volatile inventory can be approximated as its present-day inventory. However, significant fractions of this small total loss of N2 occurred during the Wild Years, so estimates made using present-day rates will be underestimates.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

150  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
