# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [19]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from time import strptime
import string
import html
import os
import re

In [20]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template

highlight_author_lastname = 'Girardi-Schappo'

publist = {
    "file": "D:/Dropbox/p/documentos/curriculum_vitae/template03_overleaf_em_uso/meus_artigos.bib",
    "mastersthesis": {
        "venuekey": "school",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "phdthesis": {
        "venuekey": "school",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "incollection": {
        "venuekey": "booktitle",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "inproceedings": {
        "venuekey": "booktitle",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "book": {
        "venuekey": "address",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "article":{
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    } 
}

In [21]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

def clean_bibstring(text):
    return text.replace("{", "").replace("}","").replace("\\","")

In [22]:

parser = bibtex.Parser()
bibdata = parser.parse_file(publist["file"])
pubtype_default = 'paper'

#loop through the individual references in a given bibtex file
for bib_id in bibdata.entries:
    #reset default date
    pub_year          = "1900"
    pub_month_default = "01"
    pub_day           = "01"
    
    b = bibdata.entries[bib_id].fields
    
    try:
        pub_year = f'{b["year"]}'
        pub_month = f'{b["month"]}' if 'month' in b.keys() else pub_month_default
        pub_date = pub_year+"-"+pub_month+"-"+pub_day
        pub_type = pubtype_default
        if 'pubtype' in b:
            pub_type = b['pubtype']
        
        #strip out {} as needed (some bibtex entries that maintain formatting)
        clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")

        #url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
        url_slug = re.sub("[^a-zA-Z0-9_-]", "", clean_title)
        url_slug = url_slug.replace("--","-")

        md_filename   = (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
        html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")

        #Build Citation from text
        citation = ""

        #citation authors - todo - add highlighting for primary author?
        for k,author in enumerate(bibdata.entries[bib_id].persons["author"]):
            author_name = clean_bibstring(author.first_names[0]+" "+author.last_names[0])
            if highlight_author_lastname == clean_bibstring(author.last_names[0]):
                author_name = '<u>' + author_name + '</u>'
            citation = citation+" "+author_name
            if k<len(bibdata.entries[bib_id].persons["author"]):
                citation += ', '

        # citation year
        citation += ' ('+ pub_year +'):'

        #citation title
        citation = citation + "<i>" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".</i>"

        #add venue logic depending on citation type
        pubsource = bibdata.entries[bib_id].type.lower()
        #if bibdata.entries[bib_id].type.lower() == 'article':
        venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")

        if bibdata.entries[bib_id].type.lower() in ['book','incollection','inproceedings']:
            if 'publisher' in b.keys():
                venue = venue + ', ' + b['publisher']

        citation += " <b>" + html_escape(venue)
        if 'volume' in b.keys():
            citation += ' ' + html_escape(b['volume'])
        citation += '</b>'
        if 'pages' in b.keys():
            citation += ': ' + html_escape(b['pages'])
        citation += '.'

        
        ## YAML variables
        md = "---\ntitle: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
        
        md += """collection: """ +  publist[pubsource]["collection"]["name"]

        #md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename
        
        note = False
        if "note" in b.keys():
            if len(str(b["note"])) > 5:
                md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                note = True

        md += "\ndate: " + str(pub_date) 
        md += "\nyear: " + pub_year

        md += "\nvenue: '" + html_escape(venue) + "'"
        
        paper_url = ''
        url = False
        if "url" in b.keys():
            if len(str(b["url"])) > 5:
                paper_url = str(b["url"])
                url       = True
        
        if not url:
            if "doi" in b.keys():
                if len(str(b["doi"])) > 5:
                    paper_url = "https://dx.doi.org/" + str(b["doi"])
                    url       = True
                
        if url and (len(paper_url)>0):
            md += "\npaperurl: '" + paper_url + "'"

        md += "\ncitation: '" + html_escape(citation) + "'"

        md += "\npubtype:  " + pub_type

        md += "\n---"

        
        ## Markdown description for individual page
        if note:
            md += "\n" + html_escape(b["note"]) + "\n"
        
        if "abstract" in b.keys():
            md += '\n' + b['abstract']

        #obj_type = 'paper'
        #if bibdata.entries[bib_id].type.lower() in ['book','incollection','inproceedings']:
        #    obj_type = 'book'
        #elif 'thesis' in bibdata.entries[bib_id].type.lower():
        #    obj_type = 'dissertation'
        #if url:
        #    md += "\n[Access " + obj_type + " here](" + paper_url + "){:target=\"_blank\"}\n" 
        #else:
        #    md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"

        md_filename = os.path.basename(md_filename)
        with open("../_publications/" + md_filename, 'w', encoding='utf-8') as f:
            f.write(md)
        print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
    # field may not exist for a reference
    except KeyError as e:
        print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
        continue


SUCESSFULLY PARSED Girardi2010: " Um modelo concreto para o estudo da estabilidade nuclear no  ... "
SUCESSFULLY PARSED Girardi2013Posit: " A random walk approach to the diffusion of positrons in gase ... "
SUCESSFULLY PARSED Girardi2013map: " A brief history of excitable map-based neurons and neural ne ... "
SUCESSFULLY PARSED Girardi2013aval: " Critical avalanches and subsampling in map-based neural netw ... "
SUCESSFULLY PARSED Girardi2016dyn: " Information processing occurs via critical avalanches in a m ... "
SUCESSFULLY PARSED Girardi2016: " {G}riffiths phase and long-range correlations in a biologica ... "
SUCESSFULLY PARSED Girardi2017: " Phase diagrams and dynamics of a computationally efficient m ... "
SUCESSFULLY PARSED Girardi2018: " Measuring neuronal avalanches in disordered systems with abs ... "
SUCESSFULLY PARSED Girardi2019: " Comment on ``{C}onvergence towards asymptotic state in 1-{D} ... "
SUCESSFULLY PARSED Lima2020Grang: " {G}ranger causality in the frequency dom