# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [11]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from time import strptime
import string
import html
import os
import re

In [12]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "allpubs": {
        "file": "citations.bib",
        "venuekey": "journal",
        "venue-pretext": "",
        "collection": {
            "name": "publications",
            "permalink": "/publications/"
        }
    }
}

In [13]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [14]:
for pubsource in publist:
    parser = bibtex.Parser()
    bibdata = parser.parse_file(publist[pubsource]["file"])

    for bib_id in bibdata.entries:
        pub_year, pub_month, pub_day = "1900", "01", "01"
        b = bibdata.entries[bib_id].fields

        try:
            # year
            pub_year = b.get("year", b.get("date", "1900"))

            # month
            if "month" in b:
                m = b["month"]
                if m.isdigit():
                    pub_month = "{:02d}".format(int(m))
                else:
                    try:
                        tmnth = strptime(m[:3], '%b').tm_mon
                        pub_month = "{:02d}".format(tmnth)
                    except Exception:
                        pub_month = "01"

            # day
            if "day" in b:
                pub_day = "{:02d}".format(int(b["day"]))

            pub_date = f"{pub_year}-{pub_month}-{pub_day}"

            # clean title + slug
            title_clean = b["title"].replace("{", "").replace("}", "").replace("\\", "")
            clean_title = title_clean.replace(" ", "-")
            url_slug = re.sub(r"\[.*\]|[^a-zA-Z0-9_-]", "", clean_title).replace("--", "-")

            # build filename
            md_filename = f"{pub_date}-{url_slug}.md"
            if len(md_filename) > 150:
                md_filename = md_filename[:150] + ".md"

            output_dir = os.path.abspath("../_publications/")
            os.makedirs(output_dir, exist_ok=True)
            save_path = os.path.join(output_dir, md_filename)

            # citation
            citation = ""
            for author in bibdata.entries[bib_id].persons.get("author", []):
                fname = author.first_names[0] if author.first_names else ""
                lname = author.last_names[0] if author.last_names else ""
                citation += f" {fname} {lname}, "
            citation += "\"" + html_escape(title_clean) + ".\""

            if "journal" in b:
                venue = b["journal"].replace("{", "").replace("}", "").replace("\\", "")
            elif "booktitle" in b:
                venue = b["booktitle"].replace("{", "").replace("}", "").replace("\\", "")
            elif "publisher" in b:
                venue = b["publisher"].replace("{", "").replace("}", "").replace("\\", "")
            else:
                venue = "Unknown Venue"

            citation += " " + html_escape(venue) + ", " + pub_year + "."

            # YAML front matter
            md = "---\n"
            md += f'title: "{html_escape(title_clean)}"\n'
            md += "collection: " + publist[pubsource]["collection"]["name"]
            md += "\npermalink: " + publist[pubsource]["collection"]["permalink"] + md_filename

            note = False
            if "note" in b and len(str(b["note"])) > 5:
                md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                note = True

            md += "\ndate: " + str(pub_date)
            md += "\nvenue: '" + html_escape(venue) + "'"

            url = False
            if "url" in b and len(str(b["url"])) > 5:
                md += "\npaperurl: '" + b["url"] + "'"
                url = True

            md += "\ncitation: '" + html_escape(citation) + "'"
            md += "\n---\n"

            # Markdown body
            if note:
                md += html_escape(b["note"]) + "\n"
            if url:
                md += f"\n[Access paper here]({b['url']}){{:target=\"_blank\"}}\n"
            else:
                md += f"\nUse [Google Scholar](https://scholar.google.com/scholar?q={clean_title.replace('-', '+')}){{:target=\"_blank\"}} for full citation"

            # write file once
            with open(save_path, 'w', encoding="utf-8") as f:
                f.write(md)

            print(f"SUCCESSFULLY PARSED {bib_id}: \"{b['title'][:60]}{'...' if len(b['title'])>60 else ''}\" -> {md_filename}")

        except KeyError as e:
            print(f"WARNING Missing Expected Field {e} from entry {bib_id}")
            continue


SUCCESSFULLY PARSED arshad2024nitrogen: "Nitrogen Fertility Management Strategies of Bioenergy-Type S..." -> 2024-01-01-Nitrogen-Fertility-Management-Strategies-of-Bioenergy-Type-Switchgrass-for-Profitable-Biomass-Production.md
SUCCESSFULLY PARSED yufei2022empirical: "An empirical study of the key factors affecting herders' pur..." -> 2022-01-01-An-empirical-study-of-the-key-factors-affecting-herders-purchasing-decision-on-weather-index-insuranceA-case-study-from-inner-Mongolia-auto.md
SUCCESSFULLY PARSED lin2024impact: "The impact of rural e-commerce participation on farmers’ ent..." -> 2024-01-01-The-impact-of-rural-e-commerce-participation-on-farmers-entrepreneurial-behavior-Evidence-based-on-CFPS-data-in-China.md
SUCCESSFULLY PARSED guo2022farmers: "Farmers’ willingness to pay a high premium for different typ..." -> 2022-01-01-Farmers-willingness-to-pay-a-high-premium-for-different-types-of-agricultural-insurance-Evidence-from-Inner-Mongolia-China.md
SUCCESSFULLY PARSED liu2023unco