Fetch information about a specific person identified by his/her ORCID ID, and handle API errors.

In [1]:
import sys
sys.path.append('../')

# !{sys.executable} -m pip install matplotlib

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import ast
import altair as alt
import networkx as nx
import nx_altair as nxa
from wordcloud import WordCloud, STOPWORDS 
import pandas as pd
from datetime import datetime, date
import requests
import json

from geopy.geocoders import Nominatim
import folium
from folium.plugins import MarkerCluster
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
from wikidata.client import Client

import os
from os.path import join, dirname
from dotenv import load_dotenv
load_dotenv();

### ORCID ID not found

In [2]:
# ORCID ID not found
API_KEY = os.environ.get("API_KEY")
ORCID = "0000-0003-XXXX-XXXX"

url = f'https://f130.azure-api.net/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 400:
    print(r.json()["message"])

Augment API query complete  400
FAILED: Invalid ORCID 0000-0003-XXXX-XXXX


### Missing API_KEY

In [3]:
# Missing API_KEY
API_KEY = ''
ORCID = "0000-0002-0715-6126"

url = f'https://f130.azure-api.net/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 401:
    print(f'Authentication error.')

Augment API query complete  401
Authentication error.


### ORCID ID does exist

In [4]:
# ORCID ID does exist
API_KEY = os.environ.get("API_KEY")
ORCID = "0000-0002-0068-716X"

url = f'https://f130.azure-api.net/v1/orcid/{ORCID}?subscription-key={API_KEY}'
r = requests.get(url)

# print a short confirmation on completion
print('Augment API query complete ', r.status_code)

if r.status_code == 200 and r.json()[0]["nodes"]["researchers"]:    
    researchers = r.json()[0]["nodes"]["researchers"]
    
    researcher = None
    for i in range(len(researchers)):
        if researchers[i]["orcid"] == ORCID:
            researcher = researchers[i]

print()
print(f'ORCID: {researcher["orcid"]}')
print(f'First name: {researcher["first_name"]}')
print(f'Last name: {researcher["last_name"]}')
print()
print(f'The researcher {researcher["full_name"]} is connected to {r.json()[0]["stats"]}.')

Augment API query complete  200

ORCID: 0000-0002-0068-716X
First name: Cameron
Last name: Neylon

The researcher Cameron Neylon is connected to {'datasets': 0, 'grants': 0, 'organisations': 207, 'publications': 157, 'researchers': 126}.


### List of co-authors
Only includes co-authors with ORCID IDs.

In [5]:
rf = pd.DataFrame(r.json()[0]["nodes"]["researchers"], columns=['first_name', 'last_name', 'full_name', 'orcid'])
dfStyler = rf.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

Unnamed: 0,first_name,last_name,full_name,orcid
0,Jeremy,Frey,Jeremy Frey,0000-0003-0842-4302
1,Antony,Williams,Antony Williams,0000-0002-2668-4821
2,Neil,Saunders,Neil Saunders,0000-0003-2139-6107
3,David,Gray,David Gray,0000-0001-7621-3473
4,Martin Paul,Eve,Martin Paul Eve,0000-0002-5589-8511
5,Lisa,Matthias,Lisa Matthias,0000-0002-2612-2132
6,Leighton,Coates,Leighton Coates,0000-0003-2342-049X
7,Catriona,MacCallum,Catriona MacCallum,0000-0001-9623-2225
8,Tony,Ross-Hellauer,Tony Ross-Hellauer,0000-0003-4470-7027
9,Paul,Langan,Paul Langan,0000-0002-0247-3122


### List of co-author affiliations

In [6]:
# Strip wikidata ID from key
def force_wikidata(n):
    n['key'] = n['key'].split('/')[-1]
    return n

json = map(force_wikidata, r.json()[0]["nodes"]["organisations"])

of = pd.DataFrame(json, columns=['name', 'country', 'key'])
of = of.rename(columns={'key': 'wikidata'})
dfStyler = of.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])

Unnamed: 0,name,country,wikidata
0,Leipzig University,DE,Q154804
1,Rutherford Appleton Laboratory,GB,Q45820
2,Robert Bosch GmbH,DE,Q234021
3,University of Cologne,DE,Q54096
4,Pontifical Catholic University of Peru,PE,Q200601
5,University of Southampton,GB,Q76473
6,University College London,GB,Q193196
7,Imperial College London,GB,Q189022
8,University of Glasgow,GB,Q192775
9,University of Birmingham,GB,Q223429


In [7]:
# Generate a graph from the co-authors and their affiliations
G = nx.Graph()

for index, row in rf.iterrows():
    G.add_node(row['orcid'], name=row['full_name'], color='green')

for index, row in of.iterrows():
    G.add_node(row['wikidata'], name=row['name'], color='blue')

# Convert from and to for researcher relationships into ORCID IDs (to map the node labels)
def force_pid(n):
    n['from'] = n['from'].split('/')[-1]
    n['to'] = n['to'].split('/')[-1]
    return n

json = map(force_pid, r.json()[0]['relationships']['researcher-researcher'])
ef = pd.DataFrame(json, columns=['from', 'to'])

json = map(force_pid, r.json()[0]['relationships']['researcher-organisation'])
eo = pd.DataFrame(json, columns=['from', 'to'])

G.add_edges_from(ef.to_numpy())
G.add_edges_from(eo.to_numpy())
    
# Compute positions for viz.
pos = nx.spring_layout(G)

options = {
    "font_size": 12,
    "node_size": 50,
    "edge_color": "gray",
    "linewidths": 0.1,
    "width": 1
}

# Show information about the graph
print(nx.info(G))
print("Network density:", nx.density(G))

# Draw the graph using Altair
viz = nxa.draw_networkx(G, pos=pos, node_tooltip='name', node_color='color', **options).properties(width=800, height=800)

nx.write_gexf(G, "affiliations.gexf")

# Show it as an interactive plot!
viz.interactive()

Graph with 333 nodes and 417 edges
Network density: 0.007543688266579833


In [16]:
cf = of['country'].groupby(of['country']).agg('count').reset_index(name ='count')
print(cf)

client = Client()
entity = client.get('Q127990', load=True)

for index, row in cf.iterrows():
    print(country_alpha2_to_continent_code(row['country']))
    
def get_continent(col):
    try:
        cn_continent = country_alpha2_to_continent_code(col['country'])
    except:
        cn_continent = 'Unknown' 
    return (cn_continent)



   country  count
0       AR      1
1       AU     17
2       BE      5
3       BR      3
4       CA      9
5       CH      4
6       CN      1
7       DE     21
8       DK      2
9       FR      9
10      GB     46
11      ID      1
12      IE      2
13      IT      1
14      JP      1
15      KR      1
16      NL      9
17      NO      1
18      NZ      3
19      PE      4
20      PT      1
21      RU      1
22      SE      2
23      SG      1
24      TR      2
25      US     56
26      UY      1
27      ZA      2
SA
OC
EU
SA
NA
EU
AS
EU
EU
EU
EU
AS
EU
EU
AS
AS
EU
EU
OC
SA
EU
EU
EU
AS
AS
NA
SA
AF
