Add the identifier of your arxiv paper to `arxiv_publication_ids.txt` and run `update_arxiv_entries_in_publications_json()`. If you're running code using this notebook, make sure that all functions are defined. Your arxiv indentifier should be the last value in the url of your paper: http://arxiv.org/abs/<<i>identifier>.

In [6]:
import urllib, urllib.request, xmltodict, json

def make_publication_dictionary(publication_info):
    '''
    Reformats arxiv publication information into our JSON format (returned as python dictionary).
    @param publication_info: xml from arxiv parsed into a python dictionary.
    @return publication_dictionary: dictionary in expected JSON format.
    '''
    publication_entry = publication_info["feed"]["entry"]
    publication_dictionary = {
        "title": publication_entry["title"],
        "pdf": publication_entry["id"],
        "authors": [author["name"] for author in publication_entry["author"]],
        "year": publication_entry["published"][:4],
        "abstract": publication_entry["summary"],       # note: escape characters are in here
        "published": publication_entry["published"],
        "updated": publication_entry["updated"],
        "publication_venue": "",
        "code": "",
        "site": "",
        "comments": publication_entry["arxiv:comment"]["#text"] if "arxiv:comment" in publication_entry else ""
    }
    return publication_dictionary

def make_publication_entry(arxiv_id):
    '''
    Gets publication information from arxiv API and returns as a python dictionary
    in the format we want.
    @param arxiv_id: arxiv id of the publication
    @return publication_as_dictionary: formatted dictionary of just the values we want
    '''
    url = "http://export.arxiv.org/api/query?search_query=" + arxiv_id
    arxiv_xml = urllib.request.urlopen(url)
    arxiv_data = arxiv_xml.read().decode('utf-8')
    publication_info = xmltodict.parse(arxiv_data)
    publication_as_dictionary = make_publication_dictionary(publication_info)
    return publication_as_dictionary

def update_arxiv_entries_in_publications_json():
    '''
    Goes through all entries in arxiv_publication_ids.txt and adds entries for those that don't already
    have corresponding publications in publications.json. Does not change existing entries.
    '''
    with open("arxiv_publication_ids.txt", "r") as arxiv_id_file:
        arxiv_ids = arxiv_id_file.read().splitlines()
    arxiv_id_file.close()

    with open("publications.json", "r") as publications_json_file:
        try:
            publication_json = json.load(publications_json_file)
        except json.decoder.JSONDecodeError:
            publication_json = []
        existing_arxiv_urls = [entry["pdf"] for entry in publication_json]
    publications_json_file.close()

    # Appends new arxiv entries to our object represenation of the file
    for arxiv_id in arxiv_ids:
        publication_already_exists = any((arxiv_id in url) for url in existing_arxiv_urls)
        if not publication_already_exists:
            publication_as_dictionary = make_publication_entry(arxiv_id)
            publication_json.append(publication_as_dictionary)
    
    # Adds new arxiv entries to JSON
    with open('publications.json', "w") as publications_json_file:
        json.dump(publication_json, publications_json_file, indent=4)      
    publications_json_file.close()

In [7]:
update_arxiv_entries_in_publications_json()