# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. He  ->  J. He  |  ['J. He']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
Arxiv has 59 new papers today
          2 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/2 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2308.12379


extracting tarball to tmp_2308.12379...

 done.


Found 65 bibliographic references in tmp_2308.12379/aanda.bbl.
syntax error in line 65: '}' expected
Retrieving document from  https://arxiv.org/e-print/2308.12823


extracting tarball to tmp_2308.12823... done.


K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']


Found 77 bibliographic references in tmp_2308.12823/j1000104_photoz_paper.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2308.12379-b31b1b.svg)](https://arxiv.org/abs/arXiv:2308.12379) | **Water ice: temperature-dependent refractive indexes and their  astrophysical implications**  |
|| W. R. M. Rocha, et al. -- incl., <mark>J. He</mark> |
|*Appeared on*| *2023-08-25*|
|*Comments*| *Accepted for publication in A&A, 12 pages, 15 figures*|
|**Abstract**| Interstellar ices are largely composed of frozen water. It is important to derive fundamental parameters for H$_2$O ice such as absorption and scattering opacities for which accurate complex refractive indexes are needed. The primary goal of this work is to derive ice-grain opacities based on accurate H$_2$O ice complex refractive indexes and to assess their impact on the derivation of ice column densities and porosity in space. We use the \texttt{optool} code to derive ice-grain opacities values based on new mid-IR complex refractive index measurements of H$_2$O ice. Next, we use those opacities in the \texttt{RADMC-3D} code to run a radiative transfer simulation of a protostellar envelope containing H$_2$O ice. This is used to calculate water ice column densities. We find that the real refractive index in the mid-IR of H$_2$O ice at 30~K is $\sim$14\% lower than previously reported in the literature. This has a direct impact on the ice column densities derived from the simulations of embedded protostars. We find that ice porosity plays a significant role in the opacity of icy grains and that the H$_2$O libration mode can be used as a diagnostic tool to constrain the porosity level. Finally, the refractive indexes presented here allow us to estimate a grain size detection limit of 18~$\mu$m based on the 3~$\mu$m band whereas the 6~$\mu$m band allows tracing grain sizes larger than 20~$\mu$m. Based on radiative transfer simulations using new mid-IR refractive indexes, we conclude that H$_2$O ice leads to more absorption of infrared light than previously estimated. This implies that the 3 and 6~$\mu$m bands remain detectable in icy grains with sizes larger than 10~$\mu$m. Finally, we propose that also the H$_2$O ice libration band can be a diagnostic tool to constrain the porosity level of the interstellar ice, in addition to the OH dangling bond, which is routinely used for this purpose. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2308.12823-b31b1b.svg)](https://arxiv.org/abs/arXiv:2308.12823) | **Uncovering a Massive z~7.65 Galaxy Hosting a Heavily Obscured Radio-Loud  QSO Candidate in COSMOS-Web**  |
|| E. Lambrides, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2023-08-25*|
|*Comments*| *Submitted to ApJL, Comments welcome*|
|**Abstract**| In this letter, we report the discovery of the highest redshift, heavily obscured, radio-loud QSO candidate selected using JWST NIRCam/MIRI, mid-IR, sub-mm, and radio imaging in the COSMOS-Web field. Using multi-frequency radio observations and mid-IR photometry, we identify a powerful, radio-loud (RL), growing supermassive black hole (SMBH) with significant spectral steepening of the radio SED ($f_{1.32 \mathrm{GHz}} \sim 2$ mJy, $q_{24\mu m} = -1.1$, $\alpha_{1.32-3\mathrm{GHz}}=-1.2$, $\Delta \alpha = -0.4$). In conjunction with ALMA, deep ground-based observations, ancillary space-based data, and the unprecedented resolution and sensitivity of JWST, we find no evidence of QSO contribution to the UV/optical/NIR data and thus infer heavy amounts of obscuration (N$_{\mathrm{H}} > 10^{23}$ cm$^{-2}$). Using the wealth of deep UV to sub-mm photometric data, we report a singular solution photo-z of $z_\mathrm{phot}$ = 7.65$^{+0.4}_{-0.3}$ and estimate an extremely massive host-galaxy ($\log M_{\star} = 11.92 \pm 0.06\,\mathrm{M}_{\odot}$). This source represents the furthest known obscured RL QSO candidate, and its level of obscuration aligns with the most representative but observationally scarce population of QSOs at these epochs. |

## Failed papers

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2308.12379.md
    + _build/html/tmp_2308.12379/./Figures/opac_mie.png
    + _build/html/tmp_2308.12379/./Figures/AllNKsV4.png
    + _build/html/tmp_2308.12379/./Figures/NiceV3.png
exported in  _build/html/2308.12823.md
    + _build/html/tmp_2308.12823/./photo-z_cosw-106725-1.png
    + _build/html/tmp_2308.12823/./global_sed_cosw.png
    + _build/html/tmp_2308.12823/./updated_cosw_radio_sed.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$</div>



<div id="title">

# Water ice: temperature-dependent refractive indexes and their astrophysical implications

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2308.12379-b31b1b.svg)](https://arxiv.org/abs/2308.12379)<mark>Appeared on: 2023-08-25</mark> -  _Accepted for publication in A&A, 12 pages, 15 figures_

</div>
<div id="authors">

W. R. M. Rocha, et al. -- incl., <mark>J. He</mark>

</div>
<div id="abstract">

**Abstract:** Inter- and circumstellar ices are largely composed of frozen water. Therefore, it is important to derive fundamental parameters for $H_2$ O ice such as absorption and scattering opacities for which accurate complex refractive indexes are needed. The primary goal of the work presented here is to derive ice-grain opacities based on accurate $H_2$ O ice complex refractive indexes at low temperatures and to assess the impact this has on the derivation of ice column densities and porosity in space. We use the \texttt{optool} code to derive ice-grain scattering and absorption opacity values based on new and previously reported mid-IR complex refractive index measurements of $H_2$ O ice, primarily in its amorphous form, but not exclusively. Next, we use those opacities in the \texttt{RADMC-3D} code to run a radiative transfer simulation of a protostellar envelope containing $H_2$ O ice, which is then used to calculate water ice column densities. We find that the real refractive index in the mid-IR of $H_2$ O ice at 30 K is $\sim$ 14 \% lower than previously reported in the literature. This has a direct impact on the ice column densities derived from the simulations of embedded protostars. Additionally, we find that ice porosity plays a significant role in the opacity of icy grains and that the $H_2$ O libration mode can be used as a diagnostic tool to constrain the porosity level. Finally, the refractive indexes presented here allow us to estimate a grain size detection limit of 18 $\mu$ m based on the 3 $\mu$ m band whereas the 6 $\mu$ m band allows tracing grain sizes larger than 20 $\mu$ m. Based on radiative transfer simulations using new mid-IR refractive indexes, we conclude that $H_2$ O ice leads to more absorption of infrared light than previously estimated. This implies that the 3 and 6 $\mu$ m bands remain detectable in icy grains with sizes larger than 10 $\mu$ m. Finally, we propose that also the $H_2$ O ice libration band can be used as a diagnostic tool to constrain the porosity level of the interstellar ice, in addition to the OH dangling bond, which is now routinely used for this purpose.

</div>

<div id="div_fig1">

<img src="tmp_2308.12379/./Figures/opac_mie.png" alt="Fig3" width="100%"/>

**Figure 3. -** Absorption (upper panels) and scattering (lower panels)  opacities of ice-dust grains assuming compact (left) and porous ices (right). Lines in black are the opacities assuming the water ice refractive index derived in this paper, i.e., assuming $n_{700nm} = 1.16$(NK1), whereas curves in red show the opacities calculated using $n_{700nm} = 1.32$(NK2). The small panels below the large ones show the variation between the two opacities in percentage values. A small offset is performed on the absorption opacities for better readability. No offset is applied to the scattering opacity. The rectangular regions indicated by the grey dotted boxes are zoomed-in in Figure \ref{opac_mie_zoom}. (*opac_mie*)

</div>
<div id="div_fig2">

<img src="tmp_2308.12379/./Figures/AllNKsV4.png" alt="Fig15" width="100%"/>

**Figure 15. -** Wavelength-dependent refractive index of $H_2$O ice at different temperatures. Panels _ a_ and _ b_ show the real and imaginary parts of the complex refractive index in a broad-range perspective (0.3$-$20 $\mu$m). Panels _ c$-$h_ show zoom-ins of selected wavelengths in panels _ a_ and _ b_ indicated by the hatched areas. (*nkallT*)

</div>
<div id="div_fig3">

<img src="tmp_2308.12379/./Figures/NiceV3.png" alt="Fig8" width="100%"/>

**Figure 8. -** Effects of opacity values derived for grains under the DHS approach by assuming the NK1 and NK2 values. Panel $a$ shows the synthetic protostellar spectrum with $H_2$O ice and silicate absorption bands calculated from opacity models based on NK1 (black) and NK2 (red) refractive index values. The black and red and dashed lines over the 3 $\mu$m feature are the continuum. The blue box around 13 $\mu$m highlights the absence of the $H_2$O libration band in the spectrum derived from NK2 values. Panel $b$ displays the same as panel $a$ but for porous ice. Panels $c$ and $d$ present the $H_2$O ice column density derived from the optical depth spectrum using compact and porous ices, respectively. Finally, Panels $e$ and $f$ compare the ice column densities from both cases (grey: NK1; red: NK2). (*Nice*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2308.12379"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$</div>



<div id="title">

# Uncovering a Massive z$\sim$7.65 Galaxy Hosting a Heavily Obscured Radio-Loud QSO Candidate in COSMOS-Web

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2308.12823-b31b1b.svg)](https://arxiv.org/abs/2308.12823)<mark>Appeared on: 2023-08-25</mark> -  _Submitted to ApJL, Comments welcome_

</div>
<div id="authors">

E. Lambrides, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** In this letter, we report the discovery of the highest redshift, heavily obscured, radio-loud QSO candidate selected using JWST NIRCam/MIRI, mid-IR, sub-mm, and radio imaging in the COSMOS-Web field. Using multi-frequency radio observations and mid-IR photometry, we identify a powerful, radio-loud (RL), growing supermassive black hole (SMBH) with significant spectral steepening of the radio SED ( $f_{1.32 \mathrm{GHz}} \sim 2$ mJy, $q_{24\micron} = -1.1$ , $\alpha_{1.32-3\mathrm{GHz}}=-1.2$ , $\Delta \alpha = -0.4$ ). In conjunction with ALMA, deep ground-based observations, ancillary space-based data, and the unprecedented resolution and sensitivity of JWST, we find no evidence of QSO contribution to the UV/optical/NIR data and thus infer heavy amounts of obscuration (N $_{\mathrm{H}} > 10^{23}$ cm $^{-2}$ ). Using the wealth of deep UV to sub-mm photometric data, we report a singular solution photo-z of $z_\mathrm{phot}$ = 7.65 $^{+0.4}_{-0.3}$ and estimate an extremely massive host-galaxy ( $\log M_{\star} = 11.92 \pm 0.06 \mathrm{M}_{\odot}$ ). This source represents the furthest known obscured RL QSO candidate, and its level of obscuration aligns with the most representative but observationally scarce population of QSOs at these epochs.

</div>

<div id="div_fig1">

<img src="tmp_2308.12823/./photo-z_cosw-106725-1.png" alt="Fig3" width="100%"/>

**Figure 3. -** Results from fitting the optical, NIR and MIR with {*\texttt{EAzY*py}}. Non-detections with 27 mag upper limits: HSC $g$, HSC $r$, HSC $i$, HSC $z$, HST _F814W_, HSC $y$. $>3 \sigma$ detections: JWST _F115W_, JWST _F150W_, HST _F160W_, JWST _F277W_, IRAC Channel 1, JWST _F444W_, IRAC Channel 2, IRAC Channel 3, JWST MIRI 7.7 \micron. The redshift is constrained to $z = 7.65^{+0.4}_{-0.3}$ fit with combinations of SSP template from  ([Bruzual and Charlot 2003]()) . Inset: We show the p(z) via EAzY and BAG (*fig:photoz*)

</div>
<div id="div_fig2">

<img src="tmp_2308.12823/./global_sed_cosw.png" alt="Fig4" width="100%"/>

**Figure 4. -** _Left Panel:_ Optical-IR-radio SED fitting with BC03 stellar  ([Bruzual and Charlot 2003]()) , mid-IR AGN  ([Mullaney, et. al 2011]()) , Draine \& Li dust  ([Draine and Li 2007]())  and power-law radio templates (using the MICHI2 code;  ([Liu, Daddi and Schinnerer 2021]()) ). The black line indicates the composite best-fit model and the blue symbols are photometric data points, with upper limits shown as downward arrows. The stellar, mid-IR AGN, dust, and radio components are indicated by the cyan, yellow, red, and magenta dashed lines, respectively.
    _Right panels:_ The 1/$\chi^2$ distributions from the fitting for the four parameters: stellar mass, dust attenuation $E(B-V)$, QSO component's luminosity integrated over 10-1000 $\mu$m, and dust component's luminosity integrated over 8-1000 $\mu$m. The yellow highlighted regions correspond to the 95\% confidence intervals. (*fig:Opt-IR-radio SED*)

</div>
<div id="div_fig3">

<img src="tmp_2308.12823/./updated_cosw_radio_sed.png" alt="Fig1" width="100%"/>

**Figure 1. -** Radio SED: All fluxes and associated errors are listed in Table \ref{tab:phot}. We measure the spectral slope between two sets of radio frequencies (blue line, orange line) and find significant spectral steepening indicative of high-$z$ RL QSO  ([Saxena, Jagannathan and Röttgering 2018](), [Endsley, Stark and Lyu 2022](), [Broderick, Drouart and Seymour 2022]()) . In the upper-right corner inset, we show the radio SED for the z$_{spec}=6.8$ heavily obscured RL AGN from  ([Endsley, Stark and Lyu 2022]())  for reference. (*fig:radio_sed*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2308.12823"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

253  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

6  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
