# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [3]:
!pip install pybtex

Collecting pybtex
  Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m561.4/561.4 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
Collecting latexcodec>=1.0.4
  Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)
Installing collected packages: latexcodec, pybtex
Successfully installed latexcodec-3.0.0 pybtex-0.24.0


In [44]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from time import strptime
import string
import html
import os
import re

In [139]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "proceeding": {
        "file" : "proceedings.bib",
        "venuekey": "booktitle",
        "venue-pretext": "In the proceedings of ",
        "collection" : {"name":"publications",
                        "category": "publications",
                        "permalink":"/publication/"}
        
    },
    "thesis":{
        "file": "thesis.bib",
        "venuekey" : "school",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "category": "thesis",
                        "permalink":"/publication/"}
    },
    "journals":{
        "file": "survey.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "category": "survey",
                        "permalink":"/publication/"}
    },
    "preprints":{
        "file": "preprints.bib",
        "venuekey" : "type",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "category": "preprints",
                        "permalink":"/publication/"}
    } 
    
}

In [140]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)


import re

# Dictionary of LaTeX-like sequences and their Unicode equivalents
latex_to_unicode = {
    r'{\\\"o}': 'ö',
    r'{\\\"a}': 'ä',
    r'{\\\'a}': 'á',
    r'{\\\'e}': 'é',
    r'{\\\'i}': 'í',
    r'{\\\'o}': 'ó',
    r'{\\\'u}': 'ú',
    r'{\\\`a}': 'à',
    r'{\\\`e}': 'è',
    r'{\\\`i}': 'ì',
    r'{\\\`o}': 'ò',
    r'{\\\`u}': 'ù',
    r'{\\\^a}': 'â',
    r'{\\\^e}': 'ê',
    r'{\\\^i}': 'î',
    r'{\\\^o}': 'ô',
    r'{\\\^u}': 'û',
    r'{\\\~n}': 'ñ',
    r'{\\\~o}': 'õ'
}

# Function to replace LaTeX-like sequences with Unicode characters
def convert_latex_to_unicode(text):
    for latex_char, unicode_char in latex_to_unicode.items():
        text = re.sub(latex_char, unicode_char, text)
    return text

In [150]:
for pubsource in publist:
    parser = bibtex.Parser()
    bibdata = parser.parse_file(publist[pubsource]["file"])

    #loop through the individual references in a given bibtex file
    for bib_id in bibdata.entries:
        #reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"
        
        b = bibdata.entries[bib_id].fields
        
        try:
            pub_year = f'{b["year"]}'

            #todo: this hack for month and day needs some cleanup
            if "month" in b.keys(): 
                if(len(b["month"])<3):
                    pub_month = "0"+b["month"]
                    pub_month = pub_month[-2:]
                elif(b["month"] not in range(12)):
                    tmnth = strptime(b["month"][:3],'%b').tm_mon   
                    pub_month = "{:02d}".format(tmnth) 
                else:
                    pub_month = str(b["month"])
            if "day" in b.keys(): 
                pub_day = str(b["day"])

                
            pub_date = pub_year+"-"+pub_month+"-"+pub_day
            
            #strip out {} as needed (some bibtex entries that maintain formatting)
            clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")    

            url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
            url_slug = url_slug.replace("--","-")

            md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
            html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")

            #Build Citation from text
            citation = ""
            authors = []
            #citation authors - todo - add highlighting for primary author?
            for author in bibdata.entries[bib_id].persons["author"]:
                if len(author.first_names) > 0:
                    cit = author.first_names[0]+" "+author.last_names[0]
                else:
                    cit = author.last_names[0]
                citation = citation+" "+cit + ", "
                authors.append(cit)

            # = citation
            authors = ', '.join(authors)
            authors = convert_latex_to_unicode(authors)
            #citation title
            citation = convert_latex_to_unicode(citation) + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""

            #add venue logic depending on citation type
            venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")

            citation = citation + " " + html_escape(venue)
            citation = citation + ", " + pub_year + "."

            
            ## YAML variables
            md = "---\ntitle: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'

            md += """authors: """ + authors + '\n'
            
            md += """collection: """ +  publist[pubsource]["collection"]["name"]

            md += """\ncategory: """ +  publist[pubsource]["collection"]["category"]

            md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename
            
            note = False
            if "note" in b.keys():
                if len(str(b["note"])) > 5:
                    md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                    note = True

            md += "\ndate: " + str(pub_date) 

            md += "\nvenue: '" + html_escape(venue) + "'"
            
            url = False
            if "url" in b.keys():
                if len(str(b["url"])) > 5:
                    md += "\npaperurl: '" + b["url"] + "'"
                    url = True

            code = False
            if "code" in b.keys():
                if len(str(b["code"])) > 5:
                    md += "\ncodeurl: '" + b["code"] + "'"
                    code = True

            md += "\ncitation: '" + html_escape(citation) + "'"

            md += "\n---"

            
            ## Markdown description for individual page
            if note:
                md += "\n" + html_escape(b["note"]) + "\n"

            '''
            if url:
                md += "\n[Paper](" + b["url"] + "){:target=\"_blank\"}" 
            #else:
            #    md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"

            if code:
                md += " [Code](" + b["code"] + "){:target=\"_blank\"}\n" 
            '''
            md_filename = os.path.basename(md_filename)

            with open("../_publications/" + md_filename, 'w', encoding="utf-8") as f:
                f.write(md)
            print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
        # field may not exist for a reference
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
            continue


SUCESSFULLY PARSED gunnarsson2020learning: " Learning a Deformable Registration Pyramid  "
SUCESSFULLY PARSED gunnarsson2022unsupervised: " Unsupervised dynamic modeling of medical image transformatio ... "
SUCESSFULLY PARSED gunnarsson2024online: " Online Learning in Motion Modeling for Intra-interventional  ... "
SUCESSFULLY PARSED gunnarsson2021registration: " On the Registration and Modeling of Sequential Medical Image ... "
SUCESSFULLY PARSED hering2022learn2reg: " Learn2Reg: comprehensive multi-task medical image registrati ... "
SUCESSFULLY PARSED gunnarsson2023diffusion: " {Diffusion-Based 3D Motion Estimation from Sparse 2D Observa ... "
SUCESSFULLY PARSED gunnarsson2024cinemri: " Machine learning-based 3D deformable motion modeling for MRI ... "
SUCESSFULLY PARSED gunnarsson2020registration: " Registration by tracking for sequential 2D MRI  "


In [148]:
html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) 

'Registration by tracking for sequential 2D MRI'

In [149]:
print(md)

---
title: "Registration by tracking for sequential 2D MRI"
authors: Niklas Gunnarsson, Jens Sjölund, Thomas Schön
collection: publications
category: preprints
permalink: /publication/2020-01-01-Registration-by-tracking-for-sequential-2D-MRI
date: 2020-01-01
venue: 'Preprint'
paperurl: 'https://arxiv.org/abs/2003.10819'
citation: ' Niklas Gunnarsson,  Jens Sj{\&quot;o}lund,  Thomas Sch{\&quot;o}n, &quot;Registration by tracking for sequential 2D MRI.&quot; Preprint, 2020.'
---


In [146]:
authors

'Niklas Gunnarsson, Jens Sjölund, Thomas Schön'

In [101]:
cit

'Thomas Sch{\\"o}n'