# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']


X. Zhang  ->  X. Zhang  |  ['X. Zhang']
Arxiv has 70 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates[:-1]):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/2 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2306.03120


extracting tarball to tmp_2306.03120...

 done.


H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']


Found 125 bibliographic references in tmp_2306.03120/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2306.03205


extracting tarball to tmp_2306.03205...

 done.


K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']


Found 50 bibliographic references in tmp_2306.03205/ms_v2.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2306.03120-b31b1b.svg)](https://arxiv.org/abs/arXiv:2306.03120) | **JADES: Detecting [OIII]$λ4363$ Emitters and Testing Strong Line  Calibrations in the High-$z$ Universe with Ultra-deep JWST/NIRSpec  Spectroscopy up to $z \sim 9.5$**  |
|| I. H. Laseter, et al. -- incl., <mark>H.-W. Rix</mark> |
|*Appeared on*| *2023-06-07*|
|*Comments*| *28 pages, 13 figures*|
|**Abstract**| We present 10 novel [OIII]$\lambda 4363$ auroral line detections up to $z\sim 9.5$ measured from ultra-deep JWST/NIRSpec MSA spectroscopy from the JWST Advanced Deep Extragalactic Survey (JADES). We leverage the deepest spectroscopic observations yet taken with NIRSpec to determine electron temperatures and oxygen abundances using the direct T$_e$ method. We directly compare against a suite of locally calibrated strong-line diagnostics and recent high-$z$ calibrations. We find the calibrations fail to simultaneously match our JADES sample, thus warranting a self-consistent revision of these calibrations for the high-$z$ Universe. We find weak dependence between R2 and O3O2 with metallicity, thus suggesting these line-ratios are ineffective in the high-$z$ Universe as metallicity diagnostics and degeneracy breakers. We find R3 and R23 still correlate with metallicity, but we find tentative flattening of these diagnostics, thus suggesting future difficulties when applying these strong-line ratios as metallicity indicators in the high-$z$ Universe. We also propose and test an alternative diagnostic based on a different combination of R3 and R2 with a higher dynamic range. We find a reasonably good agreement (median offset of 0.002 dex, median absolute offset of 0.13 dex) with the JWST sample at low metallicity. Our sample demonstrates higher ionization/excitation ratios than local galaxies with rest-frame EWs(H$\beta$) $\approx 200 -300$ Angstroms. However, we find the median rest-frame EWs(H$\beta$) of our sample to be $\sim 2\text{x}$ less than the galaxies used for the local calibrations. This EW discrepancy combined with the high ionization of our galaxies does not present a clear description of [OIII]$\lambda 4363$ production in the high-$z$ Universe, thus warranting a much deeper examination into the factors affecting production. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2306.03205-b31b1b.svg)](https://arxiv.org/abs/arXiv:2306.03205) | **Resolving galactic-scale obscuration of X-ray AGN at $z\gtrsim1$ with  COSMOS-Web**  |
|| J. D. Silverman, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2023-06-07*|
|*Comments*| *12 pages, 8 figures, Accepted for publication in ApJL*|
|**Abstract**| A large fraction of the accreting supermassive black hole population is shrouded by copious amounts of gas and dust, particularly in the distant ($z\gtrsim1$) Universe. While much of the obscuration is attributed to a parsec-scale torus, there is a known contribution from the larger-scale host galaxy. Using JWST/NIRCam imaging from the COSMOS-Web survey, we probe the galaxy-wide dust distribution in X-ray selected AGN up to $z\sim2$. Here, we focus on a sample of three AGNs with their host galaxies exhibiting prominent dust lanes, potentially due to their edge-on alignment. These represent 27% (3 out of 11 with early NIRCam data) of the heavily obscured ($N_H>10^{23}$ cm$^{-2}$) AGN population. With limited signs of a central AGN in the optical and near-infrared, the NIRCam images are used to produce reddening maps $E(B-V)$ of the host galaxies. We compare the mean central value of $E(B-V)$ to the X-ray obscuring column density along the line-of-sight to the AGN ($N_H\sim10^{23-23.5}$ cm$^{-2}$). We find that the extinction due to the host galaxy is present ($0.6\lesssim E(B-V) \lesssim 0.9$; $1.9 \lesssim A_V \lesssim 2.8$) and significantly contributes to the X-ray obscuration at a level of $N_H\sim10^{22.5}$ cm$^{-2}$ assuming an SMC gas-to-dust ratio which amounts to $\lesssim$30% of the total obscuring column density. These early results, including three additional cases from CEERS, demonstrate the ability to resolve such dust structures with JWST and separate the different circumnuclear and galaxy-scale obscuring structures. |

## Failed papers

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2306.03120.md
    + _build/html/tmp_2306.03120/./R2_Strong_line_calibration_comparison_PYNEB.png
    + _build/html/tmp_2306.03120/./R23_Strong_line_calibration_comparison_PYNEB.png
    + _build/html/tmp_2306.03120/./O3O2_Strong_line_calibration_comparison_PYNEB.png
exported in  _build/html/2306.03205.md
    + _build/html/tmp_2306.03205/./f2.png
    + _build/html/tmp_2306.03205/./f4a.png
    + _build/html/tmp_2306.03205/./f4b.png
    + _build/html/tmp_2306.03205/./f4c.png
    + _build/html/tmp_2306.03205/./f5a.png
    + _build/html/tmp_2306.03205/./f5b.png
    + _build/html/tmp_2306.03205/./f5c.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# **JADES: Detecting [OIII]$\lambda 4363$ Emitters and Testing Strong Line Calibrations in the High-_z** Universe with Ultra-deep JWST/NIRSpec Spectroscopy up to $z \sim 9.5$_

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2306.03120-b31b1b.svg)](https://arxiv.org/abs/2306.03120)<mark>Appeared on: 2023-06-07</mark> -  _28 pages, 13 figures_

</div>
<div id="authors">

I. H. Laseter, et al. -- incl., <mark>H.-W. Rix</mark>

</div>
<div id="abstract">

**Abstract:** We present 10 novel [ OIII ] $\lambda 4363$ auroral line detections up to $z\sim 9.5$ measured from ultra-deep JWST/NIRSpec MSA spectroscopy from the JWST Advanced Deep Extragalactic Survey (JADES). We leverage the deepest spectroscopic observations yet taken with NIRSpec to determine electron temperatures and oxygen abundances using the direct $T_e$ method. We directly compare against a suite of locally calibrated strong-line diagnostics and recent high- _z_ calibrations. We find the calibrations fail to simultaneously match our JADES sample, thus warranting a _self-consistent_ revision of these calibrations for the high- _z_ Universe. We find weak dependence between R2 and O3O2 with metallicity, thus suggesting these line-ratios are ineffective in the high- _z_ Universe as metallicity diagnostics and degeneracy breakers. We find R3 and R23 still correlate with metallicity, but we find tentative flattening of these diagnostics, thus suggesting future difficulties when applying these strong-line ratios as metallicity indicators in the high- _z_ Universe. We also propose and test an alternative diagnostic based on a different combination of R3 and R2 with a higher dynamic range. We find a reasonably good agreement (median offset of 0.002 dex, median absolute offset of 0.13 dex) with the JWST sample at low metallicity, but future investigation is required on larger samples to probe past the turnover point. At a given metallicity, our sample demonstrates higher ionization/excitation ratios than local galaxies with rest-frame EWs(H $\beta$ ) $\approx 200 -300$ Å. However, we find the median rest-frame EWs(H $\beta$ ) of our sample to be $\sim 2\text{x}$ less than the galaxies used for the local calibrations. This EW discrepancy combined with the high ionization of our galaxies does not present a clear description of [ OIII ] $\lambda 4363$ production in the high- _z_ Universe, thus warranting a much deeper examination into the factors affecting production.

</div>

<div id="div_fig1">

<img src="tmp_2306.03120/./R2_Strong_line_calibration_comparison_PYNEB.png" alt="Fig9" width="100%"/>

**Figure 9. -** The relationship between $T_e$ metallicity and R2 for our JADES sample compared with strong-line calibrations from [Maiolino, Nagao and Grazian (2008)](), [Curti, Cresci and Mannucci (2017)](), [Curti, et. al (2020)](), and the "All", "Large Equivalent Width (EW)", and "Small EW" calibrations from [Nakajima, Ouchi and Xu (2022)](). [Bian, Kewley and Dopita (2018)]() does not include a calibration for R2, but we include their calibrations for O3O2, R3, and R23 in Figures \ref{fig:O3O2 Strong Line Comparison} - \ref{fig:R23 Strong Line Comparison}. Solid lines indicate calibrated ranges whereas dotted lines indicate the extrapolation of the calibration over the metallicity range $6.9 \leq 12 + \log(\text{O/H}) \leq 9.0$. The six subplots demonstrate the change between $T_e$ derived metallicities and calibration derived metallicities for our individual galaxies. The vertical lines represent the failure of a strong-line calibration to account for the measured line ratios at the given metallicity. (*fig:R2 Strong Line Comparison*)

</div>
<div id="div_fig2">

<img src="tmp_2306.03120/./R23_Strong_line_calibration_comparison_PYNEB.png" alt="Fig12" width="100%"/>

**Figure 12. -** Identical to Figure \ref{fig:R2 Strong Line Comparison} except for the relationship between $T_e$ metallicity and R23. (*fig:R23 Strong Line Comparison*)

</div>
<div id="div_fig3">

<img src="tmp_2306.03120/./O3O2_Strong_line_calibration_comparison_PYNEB.png" alt="Fig10" width="100%"/>

**Figure 10. -** Identical to Figure \ref{fig:R2 Strong Line Comparison} except the relationship is between $T_e$ metallicity and O3O2. (*fig:O3O2 Strong Line Comparison*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2306.03120"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand$
$\newcommand$
$\newcommand{\sersic}{Sérsic}$
$\newcommand{\lenstronomy}{\texttt{lenstronomy}}$
$\newcommand{\galight}{\texttt{galight}}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand{\myemail}{john.silverman@ipmu.jp}$
$\newcommand{\ss}{\textit{\rm S\acute{e}rsic}}$</div>



<div id="title">

# Resolving galactic-scale obscuration of X-ray AGN at $z\gtrsim 1$ with COSMOS-Web

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2306.03205-b31b1b.svg)](https://arxiv.org/abs/2306.03205)<mark>Appeared on: 2023-06-07</mark> -  _12 pages, 8 figures, Accepted for publication in ApJL_

</div>
<div id="authors">

J. D. Silverman, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** A large fraction of the accreting supermassive black hole population is shrouded by copious amounts of gas and dust, particularly in the distant ( $z\gtrsim1$ ) Universe. While much of the obscuration is attributed to a parsec-scale torus, there is a known contribution from the larger-scale host galaxy. Using JWST/NIRCam imaging from the COSMOS-Web survey, we probe the galaxy-wide dust distribution in X-ray selected AGN up to $z\sim2$ . Here, we focus on a sample of three AGNs with their host galaxies exhibiting prominent dust lanes, potentially due to their edge-on alignment. These represent 27 \% (3 out of 11 with early NIRCam data) of the heavily obscured ( $N_H>10^{23}$ cm $^{-2}$ ) AGN population. With limited signs of a central AGN in the optical and near-infrared, the NIRCam images are used to produce reddening maps $E(B-V)$ of the host galaxies. We compare the mean central value of $E(B-V)$ to the X-ray obscuring column density along the line-of-sight to the AGN ( $N_H\sim10^{23-23.5}$ cm $^{-2}$ ). We find that the extinction due to the host galaxy is present ( $0.6\lesssim E(B-V) \lesssim 0.9$ ; $1.9 \lesssim A_V\lesssim 2.8$ ) and significantly contributes to the X-ray obscuration at a level of $N_H\sim10^{22.5}$ cm $^{-2}$ assuming an SMC gas-to-dust ratio which amounts to $\lesssim$ 30 \% of the total obscuring column density. These early results, including three additional cases from CEERS, demonstrate the ability to resolve such dust structures with JWST and separate the different circumnuclear and galaxy-scale obscuring structures.

</div>

<div id="div_fig1">

<img src="tmp_2306.03205/./f2.png" alt="Fig5" width="100%"/>

**Figure 5. -** HST/ACS F814W and JWST/NIRCam (F115W, F150W, F277W, F444W) images of three X-ray AGN in COSMOS-Web exhibiting galaxy-scale dust lanes. The axes are labeled in units of arcsecs while the physical scale is also shown in the top panels. The galaxies are ordered by increasing redshift (shown in the top panel) from left to right. (*fig:cw_images*)

</div>
<div id="div_fig2">

<img src="tmp_2306.03205/./f4a.png" alt="Fig1.1" width="33%"/><img src="tmp_2306.03205/./f4b.png" alt="Fig1.2" width="33%"/><img src="tmp_2306.03205/./f4c.png" alt="Fig1.3" width="33%"/>

**Figure 1. -** Host galaxy fluxes and best-fit SEDs using MICHI2 (red) and CIGALE (black). The observed fluxes are given in red with 1$\sigma$ uncertainties. The unattenuated model SED from CIGALE is shown in blue. The JWST/NIRCam filters are shown in the top panel. (*fig:galaxy_sed-fits*)

</div>
<div id="div_fig3">

<img src="tmp_2306.03205/./f5a.png" alt="Fig2.1" width="33%"/><img src="tmp_2306.03205/./f5b.png" alt="Fig2.2" width="33%"/><img src="tmp_2306.03205/./f5c.png" alt="Fig2.3" width="33%"/>

**Figure 2. -** Decomposed AGN JWST fluxes (circles) and best-fit SED (blue=unattenuated; black=attenuated) from [Lyu, Rieke and Shi (2017)](). Open symbols indicated upper limits on AGN emission. (*fig:agn_sed-fits*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2306.03205"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

218  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

7  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
