In [1]:
## some options
inputBibFileName = "../../_bibliography/papers.bib"
outputJSONFileName = "collab_net.json"
#
authorInformationFile = (
    ""  #'authorinfoRosalindFranklin.csv' # optional co-author information
)
deleteEgoNode = False
##

import csv  # for loading comma seperated values
import json  # for writing the json
import re

# import necessary libraries
from pybtex.database.input import bibtex  # for reading the bib files


# some auxiliary functions
# you migth have to add further repalcement rules
def latex2unicode(latexString):
    """takes the name of an author as string and return the string
    with latex character replaced as normal string for the HTML"""
    latexString = latexString.replace('{\\"u}', "ü")
    latexString = latexString.replace("{\\'o}", "ó")
    latexString = latexString.replace("{\\'a}", "á")
    latexString = latexString.replace("{\\~a}", "ã")

    return latexString


# some preperation to read the bibtex file
parser = bibtex.Parser()
bib_data = parser.parse_file(inputBibFileName)


listOfAuthors = []  # empty list of authors

In [2]:
# clean data
for paperKeys in bib_data.entries.keys():
    # convert author names to strings and remove * for co-first
    thors = [latex2unicode(str(author)) for author in bib_data.entries[paperKeys].persons["author"] ]
    thors = [author.replace("*", "") for author in thors]
    bib_data.entries[paperKeys].persons["author"] = thors
    # remove latex formatting from titles "{}"
    title = bib_data.entries[paperKeys].fields["title"]
    title = title.replace("{", "")
    title = title.replace("}", "")
    bib_data.entries[paperKeys].fields["title"] = title
    
print(bib_data.entries[paperKeys].persons["author"])

['Duncan, R.', 'Lucas, M.']


In [3]:
listOfAuthors = []  # empty list of authors

# go throuh all entries
for paperKeys in bib_data.entries.keys():
    # get the authors of this paper

    # authors = bib_data.entries[paperKeys].persons['author'].split(" and ")
    # save them to the list of authors
    for author in bib_data.entries[paperKeys].persons["author"]:
        listOfAuthors.append(str(author))

listOfAuthors

# remove * for co-first 
#listOfAuthors = [author.replace("*", "") for author in listOfAuthors]

['Skardal, P. S.',
 'Battiston, F.',
 'Lucas, M.',
 'Mizuhara, M. S.',
 'Petri, G.',
 'Zhang, Y.',
 'Battiston, F.',
 'Bick, C.',
 'Lucas, M.',
 'Millán, A. P.',
 'Skardal, P. S.',
 'Zhang, Y.',
 'Lucas, M.',
 'Gallo, L.',
 'Ghavasieh, A.',
 'Battiston, F.',
 'De Domenico, M.',
 'Lucas, M.',
 'Francois, D.',
 'Mombaerts, L.',
 'Donato, C.',
 'Skupin, A.',
 'Proverbio, D.',
 'Lucas, M.',
 'Aime, N.',
 'Callara, A.',
 'Fontanelli, L.',
 'Sebastiani, L.',
 'Santarcangelo, L.',
 'Petri, G.',
 "Moriam\\'e, M.",
 'Lucas, M.',
 'Carletti, T.',
 'Robiglio, T.',
 'Neri, M.',
 'Coppes, D.',
 'Agostinelli, C.',
 'Battiston, F.',
 'Lucas, M.',
 'Petri, G.',
 'Nurisso, M.',
 'Morandini, M.',
 'Lucas, M.',
 'Vaccarino, F.',
 'Gili, T.',
 'Petri, G.',
 'Santoro, A.',
 'Battiston, F.',
 'Lucas, M.',
 'Petri, G.',
 'Amico, E.',
 'Zhang, Y.',
 'Skardal, P. S.',
 'Battiston, F.',
 'Petri, G.',
 'Lucas, M.',
 'Brondetta, A.',
 'Bizyaeva, A.',
 'Lucas, M.',
 'Petri, G.',
 'Musslick, S.',
 'Leitão, A.',
 'L

In [14]:
sorted(listOfAuthors)

['Agostinelli, C.',
 'Aime, N.',
 'Amico, E.',
 'Arnaudon, A.',
 'Barrat, A.',
 'Battiston, F.',
 'Bick, C.',
 'Bizyaeva, A.',
 'Brondetta, A.',
 'Bronstein, M.',
 'Callara, A.',
 'Carletti, T.',
 'Cencetti, G.',
 'Coppes, D.',
 'De Domenico, M.',
 'Donato, C.',
 'Duncan, R.',
 'Expert, P.',
 'Fanelli, D.',
 'Fontanelli, L.',
 'Francois, D.',
 'Gallo, L.',
 'Gero, S.',
 'Ghavasieh, A.',
 'Gili, T.',
 'Gruber, D.',
 'Habermann, B. H.',
 'Hersh, T. A.',
 'Iacopini, I.',
 'Landry, N. W.',
 'Latora, V.',
 'Leitão, A.',
 'Lucas, M.',
 'Millán, A. P.',
 'Mizuhara, M. S.',
 'Mombaerts, L.',
 'Morandini, M.',
 "Moriam\\'e, M.",
 'Morris, A.',
 'Musslick, S.',
 'Neri, M.',
 'Newman, J.',
 'Nurisso, M.',
 'Patania, A.',
 'Peach, R. L.',
 'Petit, J.',
 'Petri, G.',
 'Poetto, S.',
 'Proverbio, D.',
 'Robiglio, T.',
 'Santarcangelo, L.',
 'Santoro, A.',
 'Schwarze, A.',
 'Sebastiani, L.',
 'Skardal, P. S.',
 'Skupin, A.',
 'Stefanovska, A.',
 'Tichit, L.',
 'Torres, L.',
 'Townsend-Teague, A.',
 'V

In [4]:


if deleteEgoNode == True:
    # we assume that the author with the most entries is the ego node and delete it
    egoNode = max(listOfAuthors, key=listOfAuthors.count)  # returns the ego node
    listOfAuthors = list(set(listOfAuthors))  # gets unique list of authors
    listOfAuthors.pop(
        listOfAuthors.index(egoNode)
    )  # deletes it from the list of authors
    print("Removing the ego node: %s " % egoNode)
else:
    listOfAuthors = list(set(listOfAuthors))  # gets unique list of authors

nAuthors = len(listOfAuthors)  # number of author nodes
nPapers = len(bib_data.entries.keys())  # number of paper nodes


# read the additional author information from the csv
authorLinks_dict = {}  # create an empty dictionary
authorImage_dict = {}  # create an empty dictionary

try:
    authorInfo_reader = csv.DictReader(open(authorInformationFile))
    for row in authorInfo_reader:
        authorLinks_dict[row["name"]] = row["url"]
        authorImage_dict[row["name"]] = row["image"]
except FileNotFoundError:
    print("no optional co-author information available")

no optional co-author information available


In [5]:
# create a dictionary reflecting the graph (there are more pythonic ways
# possible to creat this, e.g., with zip, but this is easiest)

node_list = []
# create author nodes
for i in range(nAuthors):
    node_dict = {}  # create an empty dictionary for this node
    node_dict["id"] = "A" + str(i)
    node_dict["group"] = 0
    # invert the name such that the given name is before the last name
    try:  # we need this try to deal with single author papers
        authorSplit = listOfAuthors[i].split(",")
        nameThisAuthor = authorSplit[1][1::] + " " + authorSplit[0]
    except:
        authorSplit = listOfAuthors[i]
        nameThisAuthor = authorSplit[1][1::] + " " + authorSplit[0]

    nameThisAuthorUnicode = latex2unicode(nameThisAuthor)
    print(nameThisAuthorUnicode)
    node_dict["name"] = nameThisAuthorUnicode
    node_list.append(node_dict)
    # try to set the url for this author but default is google it
    node_dict["url"] = "https://www.google.com/search?q=" + nameThisAuthor
    try:
        if authorLinks_dict[nameThisAuthor] is None:
            raise KeyError("no information for this author")
        else:
            node_dict["url"] = authorLinks_dict[nameThisAuthor]
    except KeyError:
        node_dict["url"] = "https://www.google.com/search?q=" + nameThisAuthor

        # try to set a image for this author
    try:
        node_dict["image"] = authorImage_dict[nameThisAuthor]
    except KeyError:  # if no image jsut leave blank
        node_dict["image"] = []

A. Callara
M. S. Mizuhara
P. S. Skardal
J. Newman
M. Lucas
M. Nurisso
P. Expert
M. Bronstein
R. Duncan
T. Carletti
M. Neri
A. Schwarze
A. Skupin
A. Patania
D. Francois
N. Aime
C. Donato
A. Bizyaeva
Y. Zhang
J.-G. Young
T. A. Hersh
A. Townsend-Teague
S. Poetto
T. Gili
A. Morris
D. Coppes
A. Stefanovska
T. Robiglio
L. Tichit
A. P. Millán
M. Moriam\'e
M. Morandini
G. Cencetti
I. Iacopini
G. Petri
F. Vaccarino
L. Torres
A. Barrat
S. Musslick
M. De Domenico
A. Brondetta
B. H. Habermann
A. Leitão
C. Bick
A. Arnaudon
N. W. Landry
S. Gero
C. Agostinelli
R. L. Peach
L. Sebastiani
V. Latora
D. Proverbio
J. Petit
A. Ghavasieh
E. Amico
L. Santarcangelo
L. Mombaerts
D. Fanelli
D. Gruber
F. Battiston
L. Gallo
L. Fontanelli
A. Santoro


In [6]:
# create the links between the nodes
link_list = []
i=0
for paperKeys in bib_data.entries.keys(): # go over every paper
    # create the paper node
    node_dict = {} # create an empty dictionary for this node
    node_dict["id"] = "P" + str(i)
    node_dict["group"] = 1
    thisPaperName = bib_data.entries[paperKeys].fields['title']
    # remove curly bracket in paper name, remove it
    if thisPaperName[0]=='{':
        thisPaperName=thisPaperName[1:-1]
    node_dict["name"] =  thisPaperName
    node_list.append(node_dict)

    # set image for this paper
    try:
        node_dict["image"] = bib_data.entries[paperKeys].fields['image']
    except KeyError: # if no image just leave blank
        node_dict["image"] = []


    # find the authors for this paper
    authorsThisPaper = [str(i).split('*')[0].strip() for i in bib_data.entries[paperKeys].persons['author']]

    # if the paper has a url, add it
    try:
        node_dict["url"] = bib_data.entries[paperKeys].fields['url']
    except KeyError: # otherwise refer to google
        node_dict["url"] = "https://www.google.com/search?q=" + bib_data.entries[paperKeys].fields['title']


    for authors in authorsThisPaper:
        link_dict = {} # empty dictionary for this edge
        link_dict["source"] = "P" + str(i) # attached to this paper
        try:
            link_dict["target"] = "A" + str(listOfAuthors.index(authors)) # and attached to co-author
            link_list.append(link_dict)    # save it into the list
        except ValueError:
            pass
            #print("Author %s not in list, probably the ego node." %authors )
    i=i+1

# write into dictionary
graph_dict = {"nodes" : node_list, "links" : link_list}


# opening the file to write
if outputJSONFileName:
    # Writing JSON data
    with open(outputJSONFileName, 'w') as f:
        json.dump(graph_dict, f, indent=4)


In [7]:
for i, node in enumerate(node_list):
    if node["name"] == "M. Lucas":
        idx = node["id"]
        ii = i
        print(node)
        break
    else: 
        ii=None
        
        
        

{'id': 'A4', 'group': 0, 'name': 'M. Lucas', 'url': 'https://www.google.com/search?q=M. Lucas', 'image': []}


In [8]:
# remove node representing myself
node_list.pop(ii)

{'id': 'A4',
 'group': 0,
 'name': 'M. Lucas',
 'url': 'https://www.google.com/search?q=M. Lucas',
 'image': []}

In [9]:
# remove links including me
link_list = [i for i in link_list if idx not in i.values()]

In [10]:
# opening the file to write
if outputJSONFileName:
    # Writing JSON data
    with open(outputJSONFileName, 'w') as f:
        json.dump(graph_dict, f, indent=4)

In [11]:
link_list

[{'source': 'P0', 'target': 'A2'},
 {'source': 'P0', 'target': 'A59'},
 {'source': 'P0', 'target': 'A1'},
 {'source': 'P0', 'target': 'A34'},
 {'source': 'P0', 'target': 'A18'},
 {'source': 'P1', 'target': 'A59'},
 {'source': 'P1', 'target': 'A43'},
 {'source': 'P1', 'target': 'A29'},
 {'source': 'P1', 'target': 'A2'},
 {'source': 'P1', 'target': 'A18'},
 {'source': 'P2', 'target': 'A60'},
 {'source': 'P2', 'target': 'A53'},
 {'source': 'P2', 'target': 'A59'},
 {'source': 'P2', 'target': 'A39'},
 {'source': 'P3', 'target': 'A14'},
 {'source': 'P3', 'target': 'A56'},
 {'source': 'P3', 'target': 'A16'},
 {'source': 'P3', 'target': 'A12'},
 {'source': 'P3', 'target': 'A51'},
 {'source': 'P4', 'target': 'A15'},
 {'source': 'P4', 'target': 'A0'},
 {'source': 'P4', 'target': 'A61'},
 {'source': 'P4', 'target': 'A49'},
 {'source': 'P4', 'target': 'A55'},
 {'source': 'P4', 'target': 'A34'},
 {'source': 'P5', 'target': 'A30'},
 {'source': 'P5', 'target': 'A9'},
 {'source': 'P6', 'target': 'A27'