# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)

In [3]:
# additional CSS to consider. 
# TODO: Current into each exported file. 
#       This should be set once into the webpages directly.
#       With only the class/id definitions in the .md files.

debug_html = """
<style>
#wrap{ overflow:auto; }
#fig1{ background:yellow; width:100%; float:left; padding:5px;  }
#fig2{ background:red; width:50%; float:left; clear:left; padding:5px;  }
#fig3{ background:green; width:50%; float:left; padding:5px;   }
.macros{ background:yellow; visibility:visible;}
h1 {margin: 0 0 0 0;}
mark {background-color:#fff3b6;}
img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}
</style>
""" 
html = """
<style>
#wrap{ overflow:auto; }
#fig1{ width:100%; float:left; padding: 5px;  }
#fig2{ width:50%; float:left; clear:left; padding: 5px;  }
#fig3{ width:50%; float:left; padding: 5px;  }
.macros{ visibility:hidden; height:0px; }
h1 {margin: 0em 0 0 0;}
mark {background-color:#fff3b6;}
img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}
</style>
"""

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [4]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

# select only papers with matching author names and highlight authors
hl_list = [k[0] for k in mpia_authors]

candidates = []
for paperk in new_papers:
    hl_authors = highlight_authors_in_list(paperk['authors'], hl_list)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

Arxiv has 49 new papers today
          8 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates[:-1]):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - " +
                       "_" + paper['comments'] + "_")
        doc.highlight_authors_in_list(hl_list)

        full_md = doc.generate_markdown_text()
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2303.04827


extracting tarball to tmp_2303.04827...

 done.
Retrieving document from  https://arxiv.org/e-print/2303.04830


extracting tarball to tmp_2303.04830...

 done.
Retrieving document from  https://arxiv.org/e-print/2303.04834



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2303.04834...

 done.
Retrieving document from  https://arxiv.org/e-print/2303.04889


extracting tarball to tmp_2303.04889...

 done.
Retrieving document from  https://arxiv.org/e-print/2303.04954



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2303.04954...

 done.
Retrieving document from  https://arxiv.org/e-print/2303.05014


extracting tarball to tmp_2303.05014...

 done.
Retrieving document from  https://arxiv.org/e-print/2303.05083


extracting tarball to tmp_2303.05083...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.04827-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.04827) | **Efficient Formation of Massive Galaxies at Cosmic Dawn by Feedback-Free  Starbursts**  |
|| Avishai Dekel, et al. -- incl., <mark>Zhaozhou Li</mark> |
|*Appeared on*| *2023-03-10*|
|*Comments*| *15 pages, 6 figures*|
|**Abstract**| JWST observations reveal a surprising excess of luminous galaxies at $z\sim 10$, consistent with efficient conversion of the accreted gas into stars, unlike the suppression of star formation by feedback at later times. We show that the high densities and low metallicities at this epoch guarantee a high star-formation efficiency in the most massive dark-matter haloes. Feedback-free starbursts (FFBs) occur when the free-fall time is shorter than $\sim 1$ Myr, below the time for low-metallicity massive stars to develop winds and supernovae. This corresponds to a characteristic density of $\sim 3\times 10^3$cm$^{-3}$. A comparable threshold density permits a starburst by allowing cooling to star-forming temperatures in a free-fall time. The galaxies within $\sim 10^{11} M_\odot$ haloes at $z \sim 10$ are expected to have FFB densities. The halo masses allow efficient gas supply by cold streams in a halo crossing time $\sim 80$ Myr. The FFBs gradually turn all the accreted gas into stars in clusters of $\sim 10^{4-7.5} M_\odot$ within galaxies that are rotating discs or shells. The starbursting clouds are shielded against feedback from earlier stars. We predict high star-formation efficiency above thresholds in redshift and halo mass, where the density is $10^{3-4}$cm$^{-3}$. The $z\sim 10$ haloes of $\sim 10^{10.8} M_\odot$ are predicted to host galaxies of $\sim 10^{10} M_\odot$ with SFR $\sim 65 M_\odot$ yr$^{-1}$ and sub-kpc sizes. The metallicity is $\leq 0.1 Z_\odot$ with little gas, dust, outflows and hot circumgalactic gas, allowing a top-heavy IMF but not requiring it. The post-FFB evolution of compact galaxies with thousands of young clusters may have implications on black-hole growth and globular clusters at later times. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.04830-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.04830) | **Bright Extragalactic ALMA Redshift Survey (BEARS) III: Detailed study of  emission lines from 71 Herschel targets**  |
|| M. Hagimoto, et al. -- incl., <mark>A. I. Harris</mark>, <mark>D. H. Hughes</mark>, <mark>M. W. L. Smith</mark> |
|*Appeared on*| *2023-03-10*|
|*Comments*| *30 pages, 17 figures, accepted for publication in Monthly Notices of the Royal Astronomical Society Main Journal. Comments are warmly welcomed*|
|**Abstract**| We analyse the molecular and atomic emission lines of 71 bright Herschel-selected galaxies between redshifts 1.4 to 4.6 detected by the Atacama Large Millimetre/submillimetre Array. These lines include a total of 156 CO, [C I], and H2O emission lines. For 46 galaxies, we detect two transitions of CO lines, and for these galaxies we find gas properties similar to those of other dusty star-forming galaxy (DSFG) samples. A comparison to photo-dissociation models suggests that most of Herschel-selected galaxies have similar interstellar medium conditions as local infrared-luminous galaxies and high-redshift DSFGs, although with denser gas and more intense far-ultraviolet radiation fields than normal star-forming galaxies. The line luminosities agree with the luminosity scaling relations across five orders of magnitude, although the star-formation and gas surface density distributions (i.e., Schmidt-Kennicutt relation) suggest a different star-formation phase in our galaxies (and other DSFGs) compared to local and low-redshift gas-rich, normal star-forming systems. The gas-to-dust ratios of these galaxies are similar to Milky Way values, with no apparent redshift evolution. Four of 46 sources appear to have CO line ratios in excess of the expected maximum (thermalized) profile, suggesting a rare phase in the evolution of DSFGs. Finally, we create a deep stacked spectrum over a wide rest-frame frequency (220-890 GHz) that reveals faint transitions from HCN and CH, in line with previous stacking experiments. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.04834-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.04834) | **Measurement of the angular momenta of pre-main-sequence stars: early  evolution of slow and fast rotators and empirical constraints on spin-down  torque mechanisms**  |
|| Marina Kounkel, et al. -- incl., <mark>Jason Lee Curtis</mark> |
|*Appeared on*| *2023-03-10*|
|*Comments*| *Accepted to AJ, 17 pages, 11 figures*|
|**Abstract**| We use TESS full-frame imaging data to investigate the angular momentum evolution of young stars in Orion Complex. We confirm recent findings that stars with rotation periods faster than 2 d are overwhelmingly binaries, with typical separations of tens of AU; such binaries quickly clear their disks, leading to a tendency for rapid rotators to be diskless. Among (nominally single) stars with rotation periods slower than 2 d, we observe the familiar, gyrochronological horseshoe-shaped relationship of rotation period versus $T_{\rm eff}$, indicating that the processes which govern the universal evolution of stellar rotation on Gyr timescales are already in place within the first few Myr. Using spectroscopic $v\sin i$ we determine the distribution of $\sin i$, revealing that the youngest stars are biased toward more pole-on orientations, which may be responsible for the systematics between stellar mass and age observed in star-forming regions. We are also able for the first time to make empirical, quantitative measurements of angular momenta and their time derivative as functions of stellar mass and age, finding these relationships to be much simpler and monotonic as compared to the complex relationships involving rotation period alone; evidently, the relationship between rotation period and $T_{\rm eff}$ is largely a reflection of mass-dependent stellar structure and not of angular momentum per se. Our measurements show that the stars experience spin-down torques in the range ~$10^{37}$ erg at ~1 Myr to ~$10^{35}$ erg at ~10 Myr, which provide a crucial empirical touchstone for theoretical mechanisms of angular momentum loss in young stars. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.04889-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.04889) | **Effective two-body scatterings around a massive object**  |
|| Yihan Wang, et al. -- incl., <mark>Bing Zhang</mark> |
|*Appeared on*| *2023-03-10*|
|*Comments*| *Comments are Welcome*|
|**Abstract**| Two-body scatterings under the potential of a massive object are very common in astrophysics. If the massive body is far enough away that the two small bodies are in their own gravitational sphere of influence, the gravity of the massive body can be temporarily ignored. However, this requires the scattering process to be fast enough that the small objects do not spend too much time at distances near the surface of the sphere of influence. In this paper, we derive the validation criteria for effective two-body scattering and establish a simple analytical solution for this process, which we verify through numerical scattering experiments. We use this solution to study star-black hole scatterings in the disks of Active Galactic Nuclei and planet-planet scatterings in planetary systems, and calculate their one-dimensional cross-section analytically. Our solution will be valuable in reducing computational time when treating two-body scatterings under the potential of a much more massive third body, provided that the problem settings are in the valid parameter space region identified by our study. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.04954-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.04954) | **Age-Divided Mean Stellar Populations from Full Spectrum Fitting as the  Simplified Star Formation and Chemical Evolution History of a Galaxy:  Methodology and Reliability**  |
|| <mark>Joon Hyeop Lee</mark>, Mina Pak, Hyunjin Jeong, Sree Oh |
|*Appeared on*| *2023-03-10*|
|*Comments*| *26 pages, 24 figures, accepted for publication in MNRAS*|
|**Abstract**| We introduce a practical methodology for investigating the star formation and chemical evolution history of a galaxy: age-divided mean stellar populations (ADPs) from full spectrum fitting. In this method, the mass-weighted mean stellar populations and mass fractions (f_mass) of young and old stellar components in a galaxy are separately estimated, which are divided with an age cut (selected to be 10^9.5 yr ~ 3.2 Gyr in this paper). To examine the statistical reliability of ADPs, we generate 10,000 artificial galaxy spectra, each of which consists of five random simple stellar population components. Using the Penalized PiXel-Fitting (pPXF) package, we conduct full spectrum fitting to the artificial spectra with noise as a function of wavelength, imitating the real noise of Sydney-Australian Astronomical Observatory Multi-object Integral field spectrograph (SAMI) galaxies. As a result, the \Delta (= output - input) of age and metallicity appears to significantly depend on not only signal-to-noise ratio (S/N), but also luminosity fractions (f_lum) of young and old components. At given S/N and f_lum, \Delta of young components tends to be larger than \Delta of old components; e.g., \sigma(\Delta [M/H]) ~ 0.40 versus 0.23 at S/N = 30 and f_lum = 50 per cent. The age-metallicity degeneracy appears to be insignificant, but \Delta log(age/yr) shows an obvious correlation with \Delta f_mass for young stellar components (R ~ 0.6). The impact of dust attenuation and emission lines appears to be mostly insignificant. We discuss how this methodology can be applied to spectroscopic studies of the formation histories of galaxies, with a few examples of SAMI galaxies. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.05014-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.05014) | **Advection-dominated accretion flow for the varied transition  luminosities in black hole X-ray binaries**  |
|| <mark>Jiaqi Li</mark>, Erlin Qiao |
|*Appeared on*| *2023-03-10*|
|*Comments*| *10 pages, 7 figures, 4 tables, accepted for publication in MNRAS*|
|**Abstract**| Observationally, two main spectral states, i.e., the low/hard state and the high/soft state, are identified in black hole X-ray binaries (BH-XRBs). Meanwhile, the transitions between the two states are often observed. In this paper, we re-investigate the transition luminosities in the framework of the self-similar solution of the advection-dominated accretion flow (ADAF). Specifically, we search for the critical mass accretion rate $\dot m_{\rm crit}$ of ADAF for different radii $r$ respectively. It is found that $\dot m_{\rm crit}$ decreases with decreasing $r$. By testing the effects of BH mass $m$, the magnetic parameter $\beta$ and the viscosity parameter $\alpha$, it is found that only $\alpha$ has significant effects on $\dot m_{\rm crit}-r$ relation. We define the minimum $\dot m_{\rm crit}$ (roughly at the innermost stable circular orbit) as the hard-to-soft transition rate $\dot m_{\rm tr: H\rightarrow S}$, above which BH will gradually transit from the low/hard state to the high/soft state, and $\dot m_{\rm crit}$ at $30$ Schwarzschild radii as the soft-to-hard transition rate $\dot m_{\rm tr: S\rightarrow H}$, below which BH will gradually transit from the high/soft state to the low/hard state. We derive fitting formulae of $\dot m_{\rm tr: H\rightarrow S}$ and $\dot m_{\rm tr: S\rightarrow H}$ as functions of $\alpha$ respectively. By comparing with observations, it is found that the mean value of $\alpha$ are $\alpha \sim 0.85$ and $\alpha \sim 0.33$ for the hard-to-soft transition and the soft-to-hard transition respectively, which indicates that two classes of $\alpha$ are needed for explaining the hysteresis effect during the state transition. Finally, we argue that such a constrained $\alpha$ may provide valuable clues for further exploring the accretion physics in BH-XRBs. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2303.05083-b31b1b.svg)](https://arxiv.org/abs/arXiv:2303.05083) | **Evolution of the post merger remnants from the coalescence of  oxygen-neon and carbon-oxygen white dwarf pairs**  |
|| Chengyuan Wu, et al. -- incl., <mark>Yunlang Guo</mark> |
|*Appeared on*| *2023-03-10*|
|*Comments*| *22 pages, 10 figures, 8 tables, accepted for publication in ApJL*|
|**Abstract**| Although multidimensional simulations have investigated the processes of double WD mergers, post-merger evolution only focused on the carbon-oxygen (CO) WD or helium (He) WD merger remnants. In this work, we investigate for the first time the evolution of the remnants stemmed from the merger of oxygen-neon (ONe) WDs with CO WDs. Our simulation results indicate that the merger remnants can evolve to hydrogen- and helium-deficient giants with maximum radius of about 300Rsun. Our models show evidence that merger remnants more massive than 1.95Msun can ignite Ne before significant mass-loss ensues, and they thus would become electron-capture supernovae (ECSNe). However, remnants with initial masses less than 1.90Msun will experience further core contraction and longer evolutionary time before reaching at the conditions for Ne-burning. Therefore their fates are more dependent on mass-loss rates due to stellar winds, and thus more uncertain. Relatively high mass-loss rates would cause such remnants to end their lives as ONe WDs. Our evolutionary models can naturally explain the observational properties of the double WD merger remnant IRAS 00500+6713 (J005311). As previously suggested in the literature, we propose and justify that J005311 may be the remnant from the coalescence of an ONe WD and an CO WD. We deduce that the final outcome of J005311 would be a massive ONe WD rather than a supernova explosion. Our investigations may be able to provide possible constraints on the wind mass-loss properties of the giants which have CO-dominant envelopes. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")
[ print('\t', k) for k in res ];

194  publications in the last 7 days.
	 _build/html/2303.04727.md
	 _build/html/2303.04067.md
	 _build/html/2303.03420.md
	 _build/html/2303.02816.md
	 _build/html/2303.01528.md
	 _build/html/2303.00044.md
	 _build/html/2303.00012.md
	 _build/html/2302.14137.md
	 _build/html/2302.12805.md
	 _build/html/2302.10943.md
	 _build/html/2302.10528.md
	 _build/html/2302.10008.md
	 _build/html/2302.08962.md
	 _build/html/2302.08628.md
	 _build/html/2302.07916.md
	 _build/html/2302.07880.md
	 _build/html/2302.07497.md
	 _build/html/2302.07277.md
	 _build/html/2302.07256.md
	 _build/html/2302.07234.md
	 _build/html/2302.07057.md
	 _build/html/2302.05694.md
	 _build/html/2302.05465.md
	 _build/html/2302.04507.md
	 _build/html/2302.04239.md
	 _build/html/2302.04138.md
	 _build/html/2302.03699.md
	 _build/html/2302.03576.md
	 _build/html/2302.03042.md
	 _build/html/2302.02611.md
	 _build/html/2302.02429.md
	 _build/html/2302.01678.md
	 _build/html/2302.00450.md
	 _build/html/2301.13766.md
	 _build/h

In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

3  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
print(carousel, docs, slides)
print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.
  <div class="carousel" 
       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">
    <div class="carousel-cell"> <div id="slide1" class="md_view">Content 1</div> </div>
  </div> "2303.04727.md" "slide1"
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <!-- Remove caching as much as possible -->
    <meta http-equiv="cache-control" content="no-cache" />
    <meta http-equiv="Pragma" content="no-cache" />
    <meta http-equiv="Expires" content="-1" />
    <!-- flickity bootstrap CSS -->
    <link rel="stylesheet" href="https://unpkg.com/flickity@2/dist/flickity.min.css">
    <!-- Bootstrap CSS -->
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
        integrity="sha384-EVSTQN3/azprG1Anm