Fetch information about a specific person identified by his/her ORCID ID, and handle API errors.

[Download Notebook](https://github.com/researchgraph/augment-api-beta/blob/main/docs/notebooks/affiliations.ipynb)

In [489]:
import sys
sys.path.append('../')

# !{sys.executable} -m pip install folium

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import ast
import altair as alt
import networkx as nx
import nx_altair as nxa
from wordcloud import WordCloud, STOPWORDS 
import pandas as pd
from datetime import datetime, date
import requests
import json
import folium

import os
from os.path import join, dirname
from dotenv import load_dotenv
load_dotenv();

### ORCID ID not found

In [490]:
# ORCID ID not found
API_KEY = os.environ.get("API_KEY")
ORCID = "0000-0003-XXXX-XXXX"

url = f'https://f130.azure-api.net/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 400:
    print(r.json()["message"])

Augment API query complete  400
FAILED: Invalid ORCID 0000-0003-XXXX-XXXX


### Missing API_KEY

In [491]:
# Missing API_KEY
API_KEY = ''
ORCID = "0000-0002-0715-6126"

url = f'https://f130.azure-api.net/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 401:
    print(f'Authentication error.')

Augment API query complete  401
Authentication error.


### ORCID ID does exist

In [492]:
# ORCID ID does exist
API_KEY = os.environ.get("API_KEY")
ORCID = "0000-0003-1419-2405"

url = f'https://f130.azure-api.net/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 200 and r.json()[0]["nodes"]["researchers"]:    
    researchers = r.json()[0]["nodes"]["researchers"]
    
    researcher = None
    for i in range(len(researchers)):
        if researchers[i]["orcid"] == ORCID:
            researcher = researchers[i]

print()
print(f'ORCID: {researcher["orcid"]}')
print(f'First name: {researcher["first_name"]}')
print(f'Last name: {researcher["last_name"]}')
print()
print(f'The researcher {researcher["full_name"]} is connected to {r.json()[0]["stats"]}.')

Augment API query complete  200

ORCID: 0000-0003-1419-2405
First name: Martin
Last name: Fenner

The researcher Martin Fenner is connected to {'datasets': 0, 'grants': 0, 'organisations': 178, 'publications': 299, 'researchers': 129}.


### List of co-authors
Only includes co-authors with ORCID IDs.

In [493]:
rf = pd.DataFrame(r.json()[0]["nodes"]["researchers"], columns=['first_name', 'last_name', 'full_name', 'orcid'])
dfStyler = rf.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

Unnamed: 0,first_name,last_name,full_name,orcid
0,Matthew,Cannon,Matthew Cannon,0000-0002-1496-8392
1,Patricia,Cruse,Patricia Cruse,0000-0002-9300-5278
2,Felix,Sedlmayer,Felix Sedlmayer,0000-0002-1181-0178
3,Richard,Hallett,Richard Hallett,0000-0002-8599-0773
4,John,Chodacki,John Chodacki,0000-0002-7378-2408
5,owen,cheng,owen cheng,0000-0001-7343-9784
6,Christoph,Oing,Christoph Oing,0000-0001-5578-3418
7,Tom,Demeranville,Tom Demeranville,0000-0003-0902-4386
8,Claudia,Amaya,Claudia Amaya,0000-0002-1290-9735
9,Gudmundur,Thorisson,Gudmundur Thorisson,0000-0001-5635-1860


### List of co-author affiliations

In [494]:
# Strip wikidata ID from key
def force_wikidata(n):
    n['key'] = n['key'].split('/')[-1]
    return n

json = map(force_wikidata, r.json()[0]["nodes"]["organisations"])
of = pd.DataFrame(json, columns=['name', 'country', 'key', 'ror', 'lat', 'lon'])
of = of.rename(columns={'key': 'wikidata'})
dfStyler = of.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

data = []
try:
    for index, row in of.iterrows():
        url = 'https://api.ror.org/organizations?query=' + row['wikidata']
        r2 = requests.get(url)

        # print an error message if status code != 200
        if r2.status_code != 200:
            print('ROR API query returned an error', r2.status_code)
        
        name = row['name']
        country = row['country']
        wikidata = row['wikidata']
        
        if r2.json()['number_of_results'] == 0:
            # we need to work on better aligning with ROR. Main issue seems to be wikidata identifiers for departments which ROR does not support
            print('No ROR record found for wikidata ' + row['name'] + ' ' + row['wikidata'])
        else:
            ror = r2.json()['items'][0]['id'][8:]
            lat = r2.json()['items'][0]['addresses'][0]['lat']
            lon = r2.json()['items'][0]['addresses'][0]['lng']
        data.append([name, country, wikidata, ror, lat, lon])
except IndexError as error:
    print('An IndexError has occured for index: ' + index)
    
of2 = pd.DataFrame(data, columns=['name', 'country', 'wikidata', 'ror', 'lat', 'lon'])   
dfStyler = of2.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

No ROR record found for wikidata Manchester Metropolitan University Business School Q6747455
No ROR record found for wikidata Klinikum der Johann Wolfgang Goethe-Universität Frankfurt Q101268807
No ROR record found for wikidata Stanford University School of Medicine Q4115969
No ROR record found for wikidata Göttingen State and University Library Q564783
No ROR record found for wikidata Freie Universität Berlin Fachbereich Geowissenschaften Q101259164
No ROR record found for wikidata Swansea University Medical School Q7653715
No ROR record found for wikidata Nature Research Q180419
No ROR record found for wikidata Universität Hamburg Medizinische Fakultät Q101248254
No ROR record found for wikidata Centro de Ciencias Humanas y Sociales Q5761607
No ROR record found for wikidata University of Leicester School of Biological Sciences Q86021179
No ROR record found for wikidata Akureyri Junior College Q4701979
No ROR record found for wikidata Leuphana Universität Lüneburg Fakultät Kulturwisse

Unnamed: 0,name,country,wikidata,ror,lat,lon
0,Manchester Metropolitan University Business School,GB,Q6747455,ror.org/00hx57361,40.344234,-74.651469
1,Klinikum der Johann Wolfgang Goethe-Universität Frankfurt,DE,Q101268807,ror.org/00hx57361,40.344234,-74.651469
2,Hannover Medical School,DE,Q911561,ror.org/00f2yqf98,52.3839,9.8047
3,Stanford University School of Medicine,US,Q4115969,ror.org/00f2yqf98,52.3839,9.8047
4,University of Illinois at Urbana–Champaign,US,Q457281,ror.org/047426m28,40.102182,-88.227194
5,Columbia University,US,Q49088,ror.org/00hj8s172,40.8076,-73.96239
6,University Health Network,CA,Q7894718,ror.org/042xt5161,43.658585,-79.387288
7,Open University of Catalonia,ES,Q3042433,ror.org/01f5wp925,41.406306,2.194659
8,deCODE genetics,IS,Q493712,ror.org/04dzdm737,64.135671,-21.94616
9,Göttingen State and University Library,DE,Q564783,ror.org/04dzdm737,64.135671,-21.94616


In [495]:
# map affiliations on a world map, center around home institution (Monash University, for now done manually)
m = folium.Map(tiles='cartodbpositron', location=[of2.loc[[43]].lat, of2.loc[[43]].lon], zoom_start=4)

#Adding markers to the map
for index, row in of2.iterrows():
    folium.CircleMarker(location=[row['lat'], row['lon']],popup=row['name'], fill=True,
    color="darkgreen", radius=2).add_to(m)
m

In [496]:
# Generate a graph from the co-authors and their affiliations
# use XKCD colors: https://xkcd.com/color/rgb/
G = nx.Graph()

for index, row in rf.iterrows():
    G.add_node(row['orcid'], name=row['full_name'], color='xkcd:turquoise')

for index, row in of2.iterrows():
    G.add_node(row['wikidata'], name=row['name'], color='xkcd:vivid purple')

# Convert from and to for researcher relationships into ORCID IDs (to map the node labels)
def force_pid(n):
    n['from'] = n['from'].split('/')[-1]
    n['to'] = n['to'].split('/')[-1]
    return n

json = map(force_pid, r.json()[0]['relationships']['researcher-researcher'])
ef = pd.DataFrame(json, columns=['from', 'to'])

json = map(force_pid, r.json()[0]['relationships']['researcher-organisation'])
eo = pd.DataFrame(json, columns=['from', 'to'])

G.add_edges_from(ef.to_numpy())
G.add_edges_from(eo.to_numpy())
    
# Compute positions for viz.
pos = nx.spring_layout(G)

options = {
    "font_size": 18,
    "node_size": 50,
    "edge_color": "gray",
    "linewidths": 0.1,
    "width": 1
}

# Show information about the graph
print(nx.info(G))
print("Network density:", nx.density(G))

# Draw the graph using Altair
viz = nxa.draw_networkx(G, pos=pos, node_tooltip='name', node_color='color', **options).properties(width=800, height=800)

nx.write_gexf(G, "affiliations.gexf")

# Show it as an interactive plot!
viz.interactive()

Graph with 307 nodes and 381 edges
Network density: 0.00811138787762662
