# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

S. Kraus  ->  S. Kraus  |  ['S. Kraus']
E. Bañados  ->  E. Bañados  |  ['E. Bañados']
M. Benisty  ->  M. Benisty  |  ['M. Benisty']
J. Shi  ->  J. Shi  |  ['J. Shi']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']


S. Kumar  ->  S. Kumar  |  ['S. Kumar']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
Arxiv has 104 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2512.17976


extracting tarball to tmp_2512.17976... done.
Retrieving document from  https://arxiv.org/e-print/2512.18421


not a gzip file


Retrieving document from  https://arxiv.org/e-print/2512.18439


extracting tarball to tmp_2512.18439...

 done.
Retrieving document from  https://arxiv.org/e-print/2512.18543


extracting tarball to tmp_2512.18543...

 done.
Retrieving document from  https://arxiv.org/e-print/2512.18839


extracting tarball to tmp_2512.18839... done.
Retrieving document from  https://arxiv.org/e-print/2512.19000
extracting tarball to tmp_2512.19000... done.
Retrieving document from  https://arxiv.org/e-print/2512.19218


extracting tarball to tmp_2512.19218...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.17976-b31b1b.svg)](https://arxiv.org/abs/2512.17976) | **Bridging stellar evolution and planet formation: from birth, to survivors of the fittest, to the second generation of planets**  |
|| A. Corporaal, et al. -- incl., <mark>S. Kraus</mark> |
|*Appeared on*| *2025-12-23*|
|*Comments*| *Science white paper submitted in the context of ESO's Expanding Horizons call*|
|**Abstract**|            Stars and planets form, live, and evolve in unison. Throughout the life of a star, dusty circumstellar discs and stellar outflows influence the further evolution of both the star(s) and their orbiting planet(s). Planet-forming discs, winds of red giant branch (RGB) or asymptotic giant branch (AGB) stars, and post-RGB/post-AGB discs are examples of such host environments where dust physics plays a key role. The physical processes that occur during each of these stages establishes how the Solar System as well as exoplanetary systems were formed, are evolving, and will eventually die. This White Paper aims to bridge the fields of stellar evolution and planet formation by peering into the dust kinematics and macrostructure formation, and its effect on planet-host interaction, in dusty environments from stellar birth to death. Near-future advancements in the 2030s will enable the detection, orbital monitoring and atmospheric/mineralogical characterisation of close-in (proto)planets across diverse stages of stellar evolution. To take full advantage of these developments by the 2040s, we should develop the capabilities required to image the varied dusty environments in which planets are entrained over their lifetime. This will enable extensive testing of current theoretical understandings - from the micro-scales of dust assembly to the deeply interlinked macro-scales of planet-host interactions - across diverse settings often too small, distant, and faint to be resolved in the next decade, simultaneously providing valuable constraints on the two-way interplay of dusty host environments and planetary formation/evolution.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.18439-b31b1b.svg)](https://arxiv.org/abs/2512.18439) | **Imaging the LkCa 15 system in polarimetry and total intensity without self-subtraction artefacts**  |
|| C. Swastik, et al. -- incl., <mark>M. Benisty</mark> |
|*Appeared on*| *2025-12-23*|
|*Comments*| *Accepted for publication in Astronomy & Astrophysics*|
|**Abstract**|            Studying young protoplanetary disks is essential for understanding planet formation, but traditional angular differential imaging can introduce self-subtraction artefacts that hinder interpretation of small-scale structures. We present high-resolution total- and polarized-intensity Ks-band images of the LkCa~15 system obtained with SPHERE using near-simultaneous reference-star differential imaging (star-hopping), yielding self-subtraction-free images beyond 0.1 arcsec. LkCa~15 hosts a ~160 au protoplanetary disk and has previously been reported to harbour candidate protoplanets at separations of 15--18 au. We analyse the disk morphology and dust properties and search for super-Jupiter planets beyond 20 au. We first model the near-infrared scattered-light images together with ALMA submillimetre continuum data using RADMC-3D and a two grain-size (micron and millimetre) compact olivine model. While this model broadly reproduces the disk geometry, it overpredicts the degree of forward scattering in the near-infrared. To investigate this discrepancy, we extract the scattering phase function S(theta) and polarized fraction P(theta) from the SPHERE data and compare them with aggregate-scattering models. The observed phase functions disfavour compact Mie spheres and are better matched by porous aggregates (CAHP). Recomputing the scattered-light models with porous CAHP grains in the disk surface layer significantly improves agreement with the observed Ks-band morphology and polarization, while retaining compact millimetre grains to reproduce the ALMA continuum. No new planetary companions are detected; we place upper mass limits of ~1.5 MJ beyond 200 au and ~3.6 MJ in the inner disk. Our results demonstrate that combining star-hopping imaging with phase-function diagnostics provides strong constraints on dust grain properties in protoplanetary disks.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.18543-b31b1b.svg)](https://arxiv.org/abs/2512.18543) | **Searches for Prompt Low-Frequency Radio Counterparts to Gravitational Wave Event S250206dm with the OVRO-LWA Time Machine**  |
|| N. Kosogorov, et al. -- incl., <mark>J. Shi</mark> |
|*Appeared on*| *2025-12-23*|
|*Comments*| *Accepted for publication in The Astrophysical Journal (ApJ). 19 pages, 6 figures*|
|**Abstract**|            We report on a search for prompt, low-frequency radio emission from the gravitational-wave (GW) merger S250206dm using the Owens Valley Radio Observatory Long Wavelength Array (OVRO-LWA). Early alerts favored a neutron-star-containing merger, making this a compelling target. Motivated by theoretical predictions of coherent radio bursts from mergers involving a neutron star, we utilized the OVRO-LWA Time Machine system to analyze voltage data recorded around the time of the event. The Time Machine is a two-stage voltage buffer and processing pipeline that continuously buffers raw data from all antennas across the array's nearly full-hemisphere instantaneous field of view, enabling retrospective beamforming, dedispersion, and fast-transient candidate identification. For this event, we analyzed a 30-minute interval beginning 3.5 minutes after the merger, which included two minutes of pre-alert data recovered by the ring buffer. We searched the 50% localization probability region with millisecond time resolution in the 69-86 MHz frequency band. No radio counterpart was detected above a 7-sigma fluence detection threshold of ~150 Jy ms. Using Bayesian analysis, we place a 95% confidence upper limit on the source luminosity of L95 = 4 x 10^41 erg s^-1. These constraints start to probe the bright end of the coherent-emission parameter space predicted by jet-ISM shock processes, magnetar and blitzar-like mechanisms, and recent simulation-based scenarios for neutron-star-containing mergers. This study presents the first sensitive, large-area, millisecond-timescale search for prompt low-frequency radio emission from a GW merger with the OVRO-LWA, establishing a framework in which about ten additional events will yield stringent population-level constraints.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.18839-b31b1b.svg)](https://arxiv.org/abs/2512.18839) | **Accretion geometry in neutron star low-mass X-ray binaries during the hard spectral state**  |
|| E. Meyer-Hofmeister, <mark>Y. Wang</mark>, B. F. Liu |
|*Appeared on*| *2025-12-23*|
|*Comments*| *8 pages, 5 figures; Accepted for publication in MNRAS*|
|**Abstract**|            We investigate the accretion geometry in neutron star low-mass X-ray binaries (LMXBs) in the hard spectral state. It is commonly accepted that, for low mass transfer rates, an advection-dominated accretion flow (ADAF) is present in the inner region. But the observed relativistically broadened emission lines in the reflection spectra clearly indicate the existence of discs near the innermost stable circular orbit $(R_{\rm{ISCO}})$. We investigate the interaction between the coronal flow and the disc in neutron star LMXBs, and find that gas condensation from the dominant, coronal accretion flow to an inner disc is enhanced as compared to that in black hole LMXBs as a consequence of irradiation of the corona by the neutron star surface. Computations show that for low mass transfer rates ($\sim 0.005-0.02$ Eddington rate) a persistent weak disc can coexist with a coronal flow in the innermost region, where a pure ADAF would have been expected. The inner disc extends outwards from $R_{\rm{ISCO}}$ to $\sim 10 R_{\rm{ISCO}}$ for Eddington ratios ($L/L_{\rm{Edd}}$) as low as $\sim 0.002$, covers a larger region for higher Eddington ratios, and eventually connects to the outer disc at $L/L_{\rm{Edd}} \sim 0.02$, thereby transiting to a soft state. We demonstrate that the observationally inferred region of the broad iron lines in the hard-state sources generally lies within the extension of the inner discs predicted by the condensation model. Disappearance of the broad iron lines is predicted at very low luminosities, either caused by very low accretion rates or disc truncation by strong magnetic fields.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.19000-b31b1b.svg)](https://arxiv.org/abs/2512.19000) | **$Ω_1Ω_2$-$Λ$CDM: A promising natural extension of the standard model of cosmology**  |
|| <mark>S. Kumar</mark> |
|*Appeared on*| *2025-12-23*|
|*Comments*| *8 pages, 2 figures*|
|**Abstract**|            We investigate a natural extension of the standard $\Lambda$CDM framework, the $\Omega_1\Omega_2$-$\Lambda$CDM model, in which the total energy density of the universe is expanded in powers of $1+z$. This parameterization recovers the standard $\Lambda$CDM scenario and introduces two additional, observationally testable contributions to the dark energy sector, $\Omega_1(1+z)$ and $\Omega_2(1+z)^2$, alongside the cosmological constant. Using Planck CMB and DESI BAO data, we find that this framework is suitable for relaxing the Hubble tension. The Planck CMB data alone allow substantial freedom in late-time dynamics, yielding $H_0 = 75.4^{+3.9}_{-2.3}\;\mathrm{km\;s^{-1}\;Mpc^{-1}}$, fully consistent with distance-ladder measurements from the SH0ES collaboration. When DESI BAO data are included in the analysis, the late-time expansion history becomes more tightly anchored, reducing the $H_0$ discrepancy to $\sim 2.5\sigma$ level. This highlights the limited constraining power of currently available low-redshift data measurements, especially in the context of the $\Omega_1\Omega_2$-$\Lambda$CDM model, where dynamical dark energy dominates the background expansion over a relatively large redshift range. The model naturally exhibits a smooth quintessence--phantom transition followed by asymptotic de Sitter behavior of the dark energy equation of state, alters late-time cosmic dynamics, and preserves standard early-universe physics. Overall, our results demonstrate that controlled late-time deviations from $\Lambda$CDM can improve cosmological concordance.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.19218-b31b1b.svg)](https://arxiv.org/abs/2512.19218) | **Multi-wavelength study of the pre-eruption dip in the recurrent nova T Coronae Borealis preceding imminent nova eruption**  |
|| S. Pei, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-12-23*|
|*Comments*| *12 pages, 4 figures. Accepted by Astronomy and Astrophysics*|
|**Abstract**|            We present a multi-wavelength study of the symbiotic recurrent nova (RN) T Coronae Borealis (T CrB) using Swift Burst Alert Telescope (BAT) / X-Ray Telescope (XRT) / UltraViolet Optical Telescope (UVOT) and American Association of Variable Stars Observers (AAVSO) observations from 2005 to 2025. Our analysis spans quiescent, high, and pre-eruption dip states. We find that brightening amplitudes increase toward shorter wavelengths in both optical and UV bands, while the UV and X-ray fluxes are generally anti-correlated throughout all phases. During the 2023-2024 pre-eruption dip, soft and hard X-rays increased as optical and ultraviolet (UV) brightness declined, consistent with a transition from an optically thick to thin boundary layer driven by a reduction in the accretion rate. We also report, for the first time, a second, lower-amplitude dip occurring between September 2024 and February 2025 following the primary 2023-2024 pre-eruption dip. The observed variability supports an accretion-variation scenario as a unifying explanation for both the high and dip states, and may signal an imminent nova eruption.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.18421-b31b1b.svg)](https://arxiv.org/abs/2512.18421) | **The evolution of obscured AGN across cosmic time -- A large quasar survey for the 2040s**  |
|| T. Urrutia, et al. -- incl., <mark>E. Bañados</mark> |
|*Appeared on*| *2025-12-23*|
|*Comments*| *White paper in response to ESO's "Expanding Horizons" call*|
|**Abstract**|            We propose a large quasar demographic optical multi-object spectroscopic (MOS) survey targeting over 50 million AGN candidates up to the highest redshifts possible in the optical (z~6.5), with repeat visits, using a variety of selection criteria available by 2040. A large MOS survey combining all AGN selection methods is the only way to unify a diverse range of different obscured AGN populations within a single, variability- and spectroscopy-based framework, rather than as disjoint classes selected by different methods.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

133  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

11  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

0  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
