Fetch information about a specific person identified by his/her ORCID ID, and handle API errors.

[Download Notebook](https://github.com/researchgraph/augment-api-beta/blob/main/docs/notebooks/affiliations.ipynb)

In [1]:
import sys
sys.path.append('../')

# !{sys.executable} -m pip install folium

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import ast
import altair as alt
import networkx as nx
import nx_altair as nxa
from wordcloud import WordCloud, STOPWORDS 
import pandas as pd
from datetime import datetime, date
import requests
import json
import folium

import os
from os.path import join, dirname
from dotenv import load_dotenv
load_dotenv();

### ORCID ID not found

In [2]:
# ORCID ID not found
API_KEY = os.environ.get("API_KEY")
ORCID = "0000-0003-XXXX-XXXX"

url = f'https://augmentapi.researchgraph.com/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 400:
    print(r.json()[0]["error"])

Augment API query complete  400
We have failed to identify this ORCID (0000-0003-XXXX-XXXX). If it is a new identifier, it might take a few days to appear on our server.


### Missing API_KEY

In [3]:
# Missing API_KEY
API_KEY = ''
ORCID = "0000-0002-0715-6126"

url = f'https://augmentapi.researchgraph.com/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 401:
    print(f'Authentication error.')

Augment API query complete  401
Authentication error.


### ORCID ID does exist

In [4]:
# ORCID ID does exist
API_KEY = os.environ.get("API_KEY")
ORCID = "0000-0002-0068-716X"

url = f'https://augmentapi.researchgraph.com/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 200 and r.json()[0]["nodes"]["researchers"]:    
    researchers = r.json()[0]["nodes"]["researchers"]
    
    researcher = None
    for i in range(len(researchers)):
        if researchers[i]["orcid"] == ORCID:
            researcher = researchers[i]

print()
print(f'ORCID: {researcher["orcid"]}')
print(f'First name: {researcher["first_name"]}')
print(f'Last name: {researcher["last_name"]}')
print()
print(f'The researcher {researcher["full_name"]} is connected to {r.json()[0]["stats"]}.')

Augment API query complete  200

ORCID: 0000-0002-0068-716X
First name: Cameron
Last name: Neylon

The researcher Cameron Neylon is connected to {'datasets': 18, 'grants': 9, 'organisations': 245, 'publications': 148, 'researchers': 152}.


### List of co-authors
Only includes co-authors with ORCID IDs.

In [5]:
rf = pd.DataFrame(r.json()[0]["nodes"]["researchers"], columns=['first_name', 'last_name', 'full_name', 'orcid'])
dfStyler = rf.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

Unnamed: 0,first_name,last_name,full_name,orcid
0,Neil,Saunders,Neil Saunders,0000-0003-2139-6107
1,Antony,Williams,Antony Williams,0000-0002-2668-4821
2,Marie-Claire,BELLISSENT-FUNEL,Marie-Claire BELLISSENT-FUNEL,0000-0002-6539-3605
3,David,Barrett,David Barrett,0000-0001-6900-9474
4,Volker,Urban,Volker Urban,0000-0002-7962-3408
5,Samuel,Furse,Samuel Furse,0000-0003-4267-2051
6,Damien,Jacques,Damien Jacques,0000-0002-9069-4143
7,Mithu,Lucraft,Mithu Lucraft,0000-0003-0355-6576
8,Susana,Teixeira,Susana Teixeira,0000-0002-6603-7936
9,Clifford,Tatum,Clifford Tatum,0000-0002-2212-3197


### List of co-author affiliations

In [6]:
# Strip wikidata ID from key
def force_wikidata(n):
    n['key'] = n['key'].split('/')[-1]
    return n

json = map(force_wikidata, r.json()[0]["nodes"]["organisations"])
of = pd.DataFrame(json, columns=['name', 'country', 'key', 'ror', 'lat', 'lon'])
of = of.rename(columns={'key': 'wikidata'})
dfStyler = of.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

data = []
for index, row in of.iterrows():
    url = 'https://api.ror.org/organizations?query=' + row['wikidata']
    r2 = requests.get(url)

    # print an error message if status code != 200
    if r2.status_code != 200:
        print('ROR API query returned an error', r2.status_code)

    if r2.json()['number_of_results'] == 0:
        # we need to work on better aligning with ROR. Main issue seems to be wikidata identifiers for departments which ROR does not support
        print('No ROR record found for wikidata ' + row['name'] + ' ' + row['wikidata'])
    else:
        name = row['name']
        country = row['country']
        wikidata = row['wikidata']
        ror = r2.json()['items'][0]['id'][8:]
        lat = r2.json()['items'][0]['addresses'][0]['lat']
        lon = r2.json()['items'][0]['addresses'][0]['lng']
        data.append([name, country, wikidata, ror, lat, lon])

of2 = pd.DataFrame(data, columns=['name', 'country', 'wikidata', 'ror', 'lat', 'lon'])
dfStyler = of2.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

No ROR record found for wikidata Waterford Kamhlaba Q1550406
No ROR record found for wikidata Paris Descartes University Q1155944
No ROR record found for wikidata Harvard Medical School Q49121
No ROR record found for wikidata University of Toronto Faculty of Arts and Science Q7896481
No ROR record found for wikidata Kew Gardens Q188617
No ROR record found for wikidata Leibniz Institute of Freshwater Ecology and Inland Fisheries Q1813732
No ROR record found for wikidata McGill University Faculty of Agriculture and Environment Q101009534
No ROR record found for wikidata New York University in London Q99285271
No ROR record found for wikidata Dryad Q5309616
No ROR record found for wikidata Harvard College Q49123
No ROR record found for wikidata Göttingen State and University Library Q564783
No ROR record found for wikidata Bloomsbury Publishing Q568642
No ROR record found for wikidata Swansea University Medical School Q7653715
No ROR record found for wikidata Simon Fraser University - Van

Unnamed: 0,name,country,wikidata,ror,lat,lon
0,Curtin University,AU,Q1145497,ror.org/02n415q13,-31.95224,115.8614
1,Ankara University,TR,Q1060621,ror.org/01wntqw50,39.9367,32.8303
2,Western Sydney University,AU,Q1141452,ror.org/03t52dk35,-33.59956,150.75142
3,Moscow State University,RU,Q13164,ror.org/010pmpe69,55.703935,37.52867
4,Nanyang Technological University,SG,Q721064,ror.org/02e7b5302,1.344722,103.681389
5,"University of California, Los Angeles",US,Q174710,ror.org/046rm7j60,34.05223,-118.24368
6,University of Strasbourg,FR,Q157575,ror.org/00pg6eq24,48.58392,7.74553
7,Research Centre Jülich,DE,Q697111,ror.org/02nv7yv05,50.905,6.411944
8,University of Notre Dame Australia,AU,Q1887921,ror.org/02stey378,-32.05632,115.74557
9,The Roslin Institute,GB,Q1633976,ror.org/01920rj20,55.865899,-3.199085


In [7]:
# map affiliations on a world map, center around home institution (Curtin University, for now done manually)
m = folium.Map(tiles='cartodbpositron', location=[of2.loc[[89]].lat, of2.loc[[89]].lon], zoom_start=3)

#Adding markers to the map
for index, row in of2.iterrows():
    folium.CircleMarker(location=[row['lat'], row['lon']],popup=row['name'], fill=True,
    color="#8248C6", radius=2).add_to(m)
m

In [8]:
# Generate a graph from the co-authors and their affiliations
G = nx.Graph()

for index, row in rf.iterrows():
    G.add_node(row['orcid'], name=row['full_name'], node_color='#54C48C', type='researcher')

for index, row in of2.iterrows():
    G.add_node(row['wikidata'], name=row['name'], node_color='#8248C6', type='organisation')

# Convert from and to for researcher relationships into ORCID IDs (to map the node labels)
def force_pid(n):
    n['from'] = n['from'].split('/')[-1]
    n['to'] = n['to'].split('/')[-1]
    return n

json = map(force_pid, r.json()[0]['relationships']['researcher-researcher'])
ef = pd.DataFrame(json, columns=['from', 'to'])

json = map(force_pid, r.json()[0]['relationships']['researcher-organisation'])
eo = pd.DataFrame(json, columns=['from', 'to'])

G.add_edges_from(ef.to_numpy())
G.add_edges_from(eo.to_numpy())
    
# Compute positions for viz.
pos = nx.spring_layout(G)

options = {
    "font_size": 12,
    "node_size": 50,
    "edge_color": "lightgray",
    "linewidths": 0.1,
    "width": 1
}

# Show information about the graph
print(nx.info(G))
print("Network density:", nx.density(G))

# export graph to a gephi file
nx.write_gexf(G, "affiliationss.gexf")

# Draw the graph using altair
viz = nxa.draw_networkx(G, pos=pos, node_tooltip='name', node_color='node_color', **options).properties(width=800, height=800)
viz.interactive()

Graph with 397 nodes and 507 edges
Network density: 0.0064498893214258455


[Download Gephi file](https://github.com/researchgraph/augment-api-beta/blob/main/docs/notebooks/affiliationss.gexf)