In [1]:
from s2ag.librarian import Librarian, WebResearcher
from s2ag.persistence.database_catalogue import DatabaseCatalogue, test_connection
from s2ag.requester import ThrottledRequester
import jinja2
import tagme
import requests
from secrets.tag_me import TOKEN
from io import StringIO

In [2]:
requester = ThrottledRequester()
catalogue = DatabaseCatalogue(test_connection())
librarian = Librarian(WebResearcher(requester), catalogue)
tagme.GCUBE_TOKEN = TOKEN

In [3]:
def apply(text, annotations):
    annotated = StringIO()
    current_position = 0
    for annotation in annotations.get_annotations(0.1):
        next_position = annotation.begin
        annotated.write(text[current_position:next_position])
        annotated_text = text[next_position:annotation.end]
        link = tagme.title_to_uri(annotation.entity_title)
        annotated.write(f'[{annotated_text}]({link})')
        current_position = annotation.end
    result = annotated.getvalue()
    annotated.close()
    return result


In [4]:
environment = jinja2.Environment()

In [23]:
template_string = """
**Title: {{ title }}**

*Year:  {{ year  }}*

*publication date {{ pub_date }}*

*Journal: {{ journal }}*

*influential citations: {{ icc }}*

**Abstract:** {{ abstract }}

**Authors:** {{ authors }}

{{ url }}

**PDF:** [{{ title }}]({{ pdf_url }})
"""

In [24]:
def make_page(pid: str):
    paper = librarian.get_paper(pid)
    page = f'{paper.title} ({pid})'
    template = environment.from_string(template_string)
    abstract_annotation = tagme.annotate(paper.abstract)
    abstract = apply(paper.abstract, abstract_annotation)
    md = template.render(title = paper.title,
                    year = paper.year,
                    pub_date = paper.publication_date,
                    journal = paper.journal['name'],
                    icc = paper.influential_citation_count,
                    abstract = abstract,
                    authors = ', '.join(entry['name'] for entry in paper.authors),
                    url = paper.url,
                    pdf_url = paper.open_access_pdf['url'])
    with open('%s.md' % page,'w') as p:
        p.write(md)

In [25]:
# make_page('649def34f8be52c8b66281af98ae884c09aef38b')
make_page('df816eae80d0da4fe2e6d58c70a51ac6e4124bfd')