Copyright (c) 2026 Joerg Stueckler

This file has been adapted from https://github.com/academicpages/academicpages.github.io, 
originally licensed under MIT License (see LICENSE_AcademicPages in root folder).


# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [234]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from pybtex.database.output.bibtex import Writer
from pybtex.database import BibliographyData
from time import strptime
import string
import html
import os
import re

In [235]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "inproceedings": {
        "file" : "publications.bib",
        "venuekey": "booktitle",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "type":"conference",
                        "permalink":"/publication/"}
        
    },
    "proceedings": {
        "file" : "publications.bib",
        "venuekey": "booktitle",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "type":"conference",
                        "permalink":"/publication/"}
        
    },
    "journal":{
        "file": "publications.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "type":"journal",
                        "permalink":"/publication/"}
    },
    "article":{
        "file": "publications.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "type":"journal",
                        "permalink":"/publication/"}
    },
    "inbook":{
        "file": "publications.bib",
        "venuekey" : "booktitle",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "type":"bookchapter",
                        "permalink":"/publication/"}
    },
    "techreport":{
        "file": "publications.bib",
        "venuekey" : "institution",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "type":"techreport",
                        "permalink":"/publication/"}
    } 
}

In [236]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [237]:
for pubsource in publist:
    parser = bibtex.Parser()
    writer = Writer()
    bibdata = parser.parse_file(publist[pubsource]["file"])

    #loop through the individual references in a given bibtex file
    for bib_id in bibdata.entries:

        if bibdata.entries[bib_id].type != pubsource:
            continue

        #reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"

        b = bibdata.entries[bib_id].fields
        
        try:

            pub_bibtex_string = writer.to_string(BibliographyData(entries={bib_id: bibdata.entries[bib_id]}))

            pub_year = f'{b["year"]}'

            #todo: this hack for month and day needs some cleanup
            if "month" in b.keys(): 
                if(len(b["month"])<3):
                    pub_month = "0"+b["month"]
                    pub_month = pub_month[-2:]
                elif(b["month"] not in range(12)):
                    tmnth = strptime(b["month"][:3],'%b').tm_mon   
                    pub_month = "{:02d}".format(tmnth) 
                else:
                    pub_month = str(b["month"])
            if "day" in b.keys(): 
                pub_day = str(b["day"])

                
            pub_date = pub_year+"-"+pub_month+"-"+pub_day
            
            #strip out {} as needed (some bibtex entries that maintain formatting)
            clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")    

            url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
            url_slug = url_slug.replace("--","-")

            md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
            html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")

            #Build Citation from text
            authors = ""

            #citation authors - todo - add highlighting for primary author?
            for i, author in enumerate(bibdata.entries[bib_id].persons["author"]):
                author = author.first_names[0]+" "+author.last_names[0]
                author = author.replace( "{\\\"o}", "ö" )
                author = author.replace( "{\\\"u}", "ü" )
                author = author.replace( "{\\\"a}", "ä" )
                author = author.replace( "\\\"o", "ö" )
                author = author.replace( "\\\"u", "ü" )
                author = author.replace( "\\\"a", "ä" )
                authors = authors+" "+author
                if i != len(bibdata.entries[bib_id].persons["author"])-1:
                    authors += ", "

            #add venue logic depending on citation type
            venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")
            
            ## YAML variables
            md = "---\n# This file has been autogenerated by PubsFromBib.ipynb\n"
            md += "title: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
            
            md += """collection: """ +  publist[pubsource]["collection"]["name"]
            md += "\nbib_id: '" + html_escape(bib_id) + "'"

            md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename
            
            note = False
            if "note" in b.keys():
                if len(str(b["note"])) > 5:
                    #md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                    note = True

            md += "\nauthors: " + html_escape(authors)  

            md += "\ndate: " + str(pub_date) 
            md += "\nyear: " + str(pub_year) 

            md += "\nvenue: '" + html_escape(venue) + "'"
            
            if "doi" in b.keys():
                md += "\ndoi: '" + b["doi"] + "'"
                md += "\ndoi_url: 'https://doi.org/" + b["doi"] + "'"

            if "url" in b.keys():
                if len(str(b["url"])) > 5:
                    md += "\npublisher_url: '" + b["url"] + "'"

            if "preprint_url" in b.keys():
                if len(str(b["preprint_url"])) > 5:
                    md += "\npreprint_url: '" + b["preprint_url"] + "'"

            if "code_url" in b.keys():
                if len(str(b["code_url"])) > 5:
                    md += "\ncode_url: '" + b["code_url"] + "'"

            if "data_url" in b.keys():
                if len(str(b["data_url"])) > 5:
                    md += "\ndata_url: '" + b["data_url"] + "'"

            if "video_url" in b.keys():
                if len(str(b["video_url"])) > 5:
                    md += "\nvideo_url: '" + b["video_url"] + "'"

            md += "\nbib_entry: |\n" + "\n".join("  " + line for line in pub_bibtex_string.splitlines()) + ""

            ## Markdown description for individual page
            if note:
                md += "\nnote: " + html_escape(b["note"])

            md += "\n---\n"

            md_filename = os.path.basename(md_filename)

            with open("../_publications/" + md_filename, 'w', encoding="utf-8") as f:
                f.write(md)
            #print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
        # field may not exist for a reference
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
            continue


