# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Li  ->  J. Li  |  ['J. Li']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
Arxiv has 49 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2401.17525


extracting tarball to tmp_2401.17525...

 done.
Retrieving document from  https://arxiv.org/e-print/2401.17683


extracting tarball to tmp_2401.17683... done.
Retrieving document from  https://arxiv.org/e-print/2401.17764


extracting tarball to tmp_2401.17764...

 done.




✔ → 0:header
  ↳ 7596:\section{Introduction}


✔ → 7596:\section{Introduction}
  ↳ 13994:\section{Observations and data reduction}\label{sec:Observations}
✔ → 13994:\section{Observations and data reduction}\label{sec:Observations}
  ↳ 19431:\section{Results derived from the continuum interferometric data}\label{sec:continuum_analysis}


✔ → 19431:\section{Results derived from the continuum interferometric data}\label{sec:continuum_analysis}
  ↳ 41074:\section{Results on the Br$\gamma$-line interferometric data}\label{sec:brg_analysis}


✔ → 41074:\section{Results on the Br$\gamma$-line interferometric data}\label{sec:brg_analysis}
  ↳ 51128:\section{Physical properties of the disk}


✔ → 51128:\section{Physical properties of the disk}
  ↳ 71029:\section{Discussion}\label{sec:Discussion}


✔ → 71029:\section{Discussion}\label{sec:Discussion}
  ↳ 88350:\section{Summary}
✔ → 88350:\section{Summary}
  ↳ 94611:\section{Logs and observation data}\label{apx:dataset}


✔ → 94611:\section{Logs and observation data}\label{apx:dataset}
  ↳ 107721:\section{Global fit MCMC posterior distribution functions and azimuthal modulation parameters' $\chi_r^2$ maps}


✔ → 107721:\section{Global fit MCMC posterior distribution functions and azimuthal modulation parameters' $\chi_r^2$ maps}
  ↳ 112889:\section{Spectrum wavelength calibration and star photospheric absorption model}
✔ → 112889:\section{Spectrum wavelength calibration and star photospheric absorption model}
  ↳ 117581:\section{Photometric data}


✔ → 117581:\section{Photometric data}
  ↳ 120975:\section{Interferometric variability and UV coverage}\label{sec:appendix_variability}


✔ → 120975:\section{Interferometric variability and UV coverage}\label{sec:appendix_variability}
  ↳ 134698:\section{Visualisation of the continuum geometrical models}\label{sec:visualisation}
✔ → 134698:\section{Visualisation of the continuum geometrical models}\label{sec:visualisation}
  ↳ 135914:\section{Pure-line photocenter displacements}\label{apx:brg-visualisation}
✔ → 135914:\section{Pure-line photocenter displacements}\label{apx:brg-visualisation}
  ↳ 137605:end


Found 99 bibliographic references in tmp_2401.17764/aa46926-23_corr.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.17764-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.17764) | **The GRAVITY young stellar object survey XIII. Tracing the time-variable  asymmetric disk structure in the inner AU of the Herbig star HD98922**  |
|| G. Collaboration, et al. -- incl., <mark>S. Scheithauer</mark> |
|*Appeared on*| *2024-02-01*|
|*Comments*| *45 pages, 20 figures, accepted by and to be published in Astronomy & Astrophysics (A&A)*|
|**Abstract**| Temporal variability in the photometric and spectroscopic properties of protoplanetary disks is common in YSO. However, evidence pointing toward changes in their morphology over short timescales has only been found for a few sources, mainly due to a lack of high cadence observations at mas resolution. We combine GRAVITY multi-epoch observations of HD98922 at mas resolution with PIONIER archival data covering a total time span of 11 years. We interpret the interferometric visibilities and spectral energy distribution with geometrical models and through radiative transfer techniques. We investigated high-spectral-resolution quantities to obtain information on the properties of the HI BrG-line-emitting region. The observations are best fitted by a model of a crescent-like asymmetric dust feature located at 1 au and accounting for 70% of the NIR emission. The feature has an almost constant magnitude and orbits the central star with a possible sub-Keplerian period of 12 months, although a 9 month period is another, albeit less probable, solution. The radiative transfer models show that the emission originates from a small amount of carbon-rich (25%) silicates, or quantum-heated particles located in a low-density region. Among different possible scenarios, we favor hydrodynamical instabilities in the inner disk that can create a large vortex. The high spectral resolution differential phases in the BrG-line show that the hot-gas component is offset from the star and in some cases is located between the star and the crescent feature. The scale of the emission does not favor magnetospheric accretion as a driving mechanism. The scenario of an asymmetric disk wind or a massive accreting substellar or planetary companion is discussed. With this unique observational data set for HD98922, we reveal morphological variability in the innermost 2 au of its disk region. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.17525-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.17525) | **Molecular Bubble and Outflow in S Mon Revealed by Multiband Datasets**  |
|| D. Liu, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-02-01*|
|*Comments*| *34 pages,19 figures, 5 tables, Accepted for publication in ApJ*|
|**Abstract**| We identify a molecular bubble, and study the star formation and its feedback in the S Mon region, using multiple molecular lines, young stellar objects (YSOs), and infrared data. We revisit the distance to S Mon, ~722+/-9 pc, using Gaia Data Release 3 parallaxes of the associated Class II YSOs. The bubble may be mainly driven by a massive binary system (namely 15 Mon), the primary of which is an O7V-type star. An outflow is detected in the shell of the bubble, suggesting ongoing star formation activities in the vicinity of the bubble. The total wind energy of the massive binary star is three orders of magnitude higher than the sum of the observed turbulent energy in the molecular gas and the kinetic energy of the bubble, indicating that stellar winds help to maintain the turbulence in the S Mon region and drive the bubble. We conclude that the stellar winds of massive stars have an impact on their surrounding environment. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.17683-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.17683) | **Very blue-shifted broad H$α$ in a low redshift Type-1.9 AGN: a disk  emitter or a recoiling black hole scenario**  |
|| <mark>X. Zhang</mark> |
|*Appeared on*| *2024-02-01*|
|*Comments*| *11 pages, 2 tables, 6 figures, Accepted to be published in MNRAS*|
|**Abstract**| In this manuscript, very blue-shifted broad H$\alpha$ with shifted velocity $\sim$2200km/s is reported in the low redshift Type-1.9 AGN SDSS J1052+1036. Blue-shifted broad emission lines may arise due to the presence of a rotating gas disk around central black hole (BH), but may also be a signature of rare phenomena such as gravitational wave recoil of a supermassive BH (rSMBH) or the presence of a binary BH (BBH) system. Here, due to larger shifted velocity of stronger and wider blue-shifted broad H$\alpha$, the BBH system is disfavoured. Meanwhile, if this object contained a rSMBH, intrinsic obscuration with E(B-V)$\le$0.6 should lead to a detectable broad H$\beta$, indicating the rSMBH scenario not preferred. We find that the blue-shifted broad H$\alpha$ can be well explained by emission from an AGN disk, indicating that SDSS J1052+1036 is likely a disk-emitting AGN. In order to determine which scenario, a rSMBH or a disk emitter, is more preferred, a re-observed spectrum in 2025 can provide robust clues, with a disk emitter probably leading to clear variations of peak positions, peak separations and/or peak intensity ratios in broad H$\alpha$, but with a rSMBH scenario probably leading to no variations of peak separations in broad H$\alpha$. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2401.17764.md
    + _build/html/tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2017-02-22_PL_L.jpg
    + _build/html/tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2017-03-19_PL_L.jpg
    + _build/html/tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-03-19_PL_L.jpg
    + _build/html/tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-05-23_PL_L.jpg
    + _build/html/tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-06-05_PL.jpg
    + _build/html/tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-07-11_PL_L.jpg
    + _build/html/tmp_2401.17764/./Figures/Continuum/FT-model/HD98922_Continuum_Model.png
    + _build/html/tmp_2401.17764/./Figures/Continuum/RT/HD98922_RT-Model_CS_Final.jpg


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\kp}[1]{\textcolor{orange}{\textbf{Karine: } #1}}$</div>



<div id="title">

# The GRAVITY young stellar object survey

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2401.17764-b31b1b.svg)](https://arxiv.org/abs/2401.17764)<mark>Appeared on: 2024-02-01</mark> -  _45 pages, 20 figures, accepted by and to be published in Astronomy & Astrophysics (A&A)_

</div>
<div id="authors">

G. Collaboration, et al. -- incl., <mark>S. Scheithauer</mark>

</div>
<div id="abstract">

**Abstract:** Temporal variability in the photometric and spectroscopic properties of protoplanetary disks is common in young stellar objects. However, evidence pointing toward changes in their morphology over shorttimescales has only been found for a few sources, mainly due to a lack of high-cadence observations at high angular resolution. Understanding this type of variation could be important for our understanding of phenomena related to disk evolution. We study the morphological variability of the innermost   circumstellar environment of $\object{HD 98922}$ , focusing on its dust and gas content. Multi-epoch observations of $\object{HD 98922}$ at milliarcsecond resolution with VLTI/GRAVITY in the K-band at low (R=20) and high (R=4000) spectral resolution are combined with VLTI/PIONIER archival data covering a total time span of 11 years. We interpret the interferometric visibilities and spectral energy distribution with geometrical models and through radiative transfer techniques using the code MCMax. We investigated high-spectral-resolution quantities (visibilities and differential phases)  to obtain information on the properties of the HI Brackett- $\gamma$ (Br $\gamma$ )-line-emitting region. Comparing observations taken with similar _(u,$\varv$)_ plane coverage, we find that the squared visibilities do not vary significantly, whereas we find strong variability in the closure phases, suggesting temporal variations in the asymmetric brightness distribution associated to the disk.   Our observations are best fitted by a model of a crescent-like asymmetric dust feature located at $\sim$ 1 au and accounting for $\sim$ 70 \% of the near-infrared (NIR) emission. The feature has an almost constant magnitude and orbits the central star with a possible sub-Keplerian period of $\sim$ 12 months, although a 9 month period is another, albeit less probable, solution. The radiative transfer models show that the emission originates from a small amount of carbon-rich ( $25\%$ ) silicates, or quantum-heated particles located in a low-density region. Among different possible scenarios, we favor hydrodynamical instabilities in the inner disk that can create a large vortex.   The high spectral resolution differential phases in the Br $\gamma$ line show that the hot-gas compact component is offset from the star and in some cases is located between the star and the crescent feature. The scale of the emission does not favor magnetospheric accretion as a driving mechanism. The scenario of an asymmetric disk wind or a massive accreting substellar or planetary companion is discussed. With this unique observational data set for $\object{HD 98922}$ , we reveal morphological variability in the innermost 2 au of its disk region. This property is possibly common to many other protoplanetary disks, but is not commonly observed due to a lack of high-cadence observation. It is therefore important to pursue this approach with other sources for which an extended dataset with PIONIER, GRAVITY, and possibly MATISSE is available.

</div>

<div id="div_fig1">

<img src="tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2017-02-22_PL_L.jpg" alt="Fig16.1" width="16%"/><img src="tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2017-03-19_PL_L.jpg" alt="Fig16.2" width="16%"/><img src="tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-03-19_PL_L.jpg" alt="Fig16.3" width="16%"/><img src="tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-05-23_PL_L.jpg" alt="Fig16.4" width="16%"/><img src="tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-06-05_PL.jpg" alt="Fig16.5" width="16%"/><img src="tmp_2401.17764/./Figures/Gas/GRAVITY_SC-Data/HD98922_SC-data_2019-07-11_PL_L.jpg" alt="Fig16.6" width="16%"/>

**Figure 16. -** $\object${HD 98922} GRAVITY SC data for the different epochs. For each epoch, top plots show the wavelength-calibrated and continuum-normalized spectrum, left plots show the total squared visibilities, and right plots show the total differential phases. Circles represent the pure-line quantities. Colors refer to the different baselines. (*fig:GRAVITY-SC-Data*)

</div>
<div id="div_fig2">

<img src="tmp_2401.17764/./Figures/Continuum/FT-model/HD98922_Continuum_Model.png" alt="Fig6" width="100%"/>

**Figure 6. -** Peak-normalized GRAVITY (top row) and PIONIER (bottom row) continuum model images. The dashed white lines represent the $\pm$3$\sigma$ uncertainty on the PA of the azimuthal modulation.
The central object is not displayed but is marked with a star to enhance the circumstellar emission. North is up, east is to the left.
See Appendix \ref{sec:visualisation} for the full data set. (*fig:Continuum-imgs*)

</div>
<div id="div_fig3">

<img src="tmp_2401.17764/./Figures/Continuum/RT/HD98922_RT-Model_CS_Final.jpg" alt="Fig8" width="100%"/>

**Figure 8. -** Radiative transfer modeling corresponding to Table \ref{tab:RT-BestModel}. The top left panel shows the SED for Model CS, with the blue dashed line representing the stellar black-body function and the red line showing the modeled total emission. The black bars represent the photometric data. The top right panel shows the derived dust surface density profile as a function of the distance from the star. The bottom left plot shows the dust density structure, where the black dashed line represents the $\tau$$=$ 1 surface at 2.2 $\mu$m, and the red lines represent, from left to right and for both components, the density contours at $10^{-15}$, $10^{-16}$, $10^{-17}$, and $10^{-18}$ g cm$^{-3}$, respectively. The bottom right plot shows the dust temperature structure, where the black lines represent, from left to right, the isothermal contours at 2300, 2000, 1700, 1500, and 1300 K, respectively.
     (*fig:RT-BestModel*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2401.17764"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

366  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

7  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
