# SEEK read API

Example script using the REST API to load the available publications from LiSyM.

We use the official SEEK instance
https://seek.lisym.org/  
running SEEK v1.8.1 (2019-04-10)

adapted from Wolfgang Müller, HITS

In [1]:
SEEK_URL = "https://seek.lisym.org/"

# Read publications from SEEK instance

In [15]:
import requests
import datetime

def get_json(url):
    print(url)
    r = requests.get(url)
    json = r.json()
    return json['data']

def getAllPublications():
    """ Get JSON for all publications."""
    url = SEEK_URL + 'publications.json'
    return get_json(url)

def getPublication(pid):
    url = SEEK_URL + 'publications/{}.json'.format(pid)
    return get_json(url)

def parsePublication(data):
    # print(data)
    attributes = data['attributes']
    return {
      'id': data['id'],
      'pmid': attributes['pubmed_id'],
      'doi': attributes['doi'],
      'link': attributes['link_to_pub'],  
      'title': attributes['title'],
      'authors': attributes['authors'],
      'abstract': attributes['abstract'],
      'citation': attributes['citation'],
      'journal': attributes['journal'],
      'published_date': attributes['published_date'],
      'projects': data['relationships']['projects']
    }

allPublicationsData = getAllPublications()

publications = []
for item in allPublicationsData:
    print(item)
    id = item["id"]
    fullPublicationData = getPublication(id)
    
    publications.append(parsePublication(fullPublicationData))
print(len(allPublicationsData))
# now "publications" contains a list of hashes with publication information

https://seek.lisym.org/publications.json
{'id': '1', 'type': 'publications', 'attributes': {'title': 'Specifications of Standards in Systems and Synthetic Biology: Status and Developments in 2016'}, 'links': {'self': '/publications/1'}}
https://seek.lisym.org/publications/1.json
{'id': '2', 'type': 'publications', 'attributes': {'title': 'FAIRDOMHub: a repository and collaboration environment for sharing systems biology research'}, 'links': {'self': '/publications/2'}}
https://seek.lisym.org/publications/2.json
{'id': '3', 'type': 'publications', 'attributes': {'title': 'The Human Physiome: how standards, software and innovative service infrastructures are providing the building blocks to make it achievable'}, 'links': {'self': '/publications/3'}}
https://seek.lisym.org/publications/3.json
{'id': '4', 'type': 'publications', 'attributes': {'title': 'SABIO-RK, von Daten in der Publikation zur Suchlösung für\xa0Spezialisten'}, 'links': {'self': '/publications/4'}}
https://seek.lisym.org/

NameError: name 'allPublicationData' is not defined

In [16]:
print(publications[0])

{'id': '1', 'pmid': 28187405, 'doi': None, 'link': 'https://www.ncbi.nlm.nih.gov/pubmed/28187405', 'title': 'Specifications of Standards in Systems and Synthetic Biology: Status and Developments in 2016', 'authors': ['F. Schreiber', 'G. D. Bader', 'P. Gleeson', 'Martin Golebiewski', 'M. Hucka', 'N. Le Novere', 'C. Myers', 'D. Nickerson', 'B. Sommer', 'D. Walthemath'], 'abstract': 'Standards are essential to the advancement of science and technology. In systems and synthetic biology, numerous standards and associated tools have been developed over the last 16 years. This special issue of the Journal of Integrative Bioinformatics aims to support the exchange, distribution and archiving of these standards, as well as to provide centralised and easily citable access to them.', 'citation': 'J Integr Bioinform. 2016 Dec 18;13(3):289. doi: 10.2390/biecoll-jib-2016-289.', 'journal': 'J Integr Bioinform', 'published_date': '2017-02-12', 'projects': {'data': [{'id': '2', 'type': 'projects'}]}}


In [17]:
import pandas as pd
df_pub = pd.DataFrame(publications)
# df_pub = df_pub[["title", "journal", "published_date", "citation", "authors", "projects"]]

# save as excel file
writer = pd.ExcelWriter('./results/publications.xlsx', engine='xlsxwriter')
df_pub.to_excel(writer, sheet_name='Publications', index=False)
writer.save()

df_pub.head(10)

Unnamed: 0,id,pmid,doi,link,title,authors,abstract,citation,journal,published_date,projects
0,1,28187405.0,,https://www.ncbi.nlm.nih.gov/pubmed/28187405,Specifications of Standards in Systems and Syn...,"[F. Schreiber, G. D. Bader, P. Gleeson, Martin...",Standards are essential to the advancement of ...,J Integr Bioinform. 2016 Dec 18;13(3):289. doi...,J Integr Bioinform,2017-02-12,"{'data': [{'id': '2', 'type': 'projects'}]}"
1,2,27899646.0,,https://www.ncbi.nlm.nih.gov/pubmed/27899646,FAIRDOMHub: a repository and collaboration env...,"[K. Wolstencroft, Olga Krebs, J. L. Snoep, N. ...",The FAIRDOMHub is a repository for publishing ...,Nucleic Acids Res. 2017 Jan 4;45(D1):D404-D407...,Nucleic Acids Res,2016-12-03,"{'data': [{'id': '2', 'type': 'projects'}]}"
2,3,27051515.0,,https://www.ncbi.nlm.nih.gov/pubmed/27051515,"The Human Physiome: how standards, software an...","[D. Nickerson, K. Atalag, B. de Bono, J. Geige...",Reconstructing and understanding the Human Phy...,Interface Focus. 2016 Apr 6;6(2):20150103. doi...,Interface Focus,2016-04-07,"{'data': [{'id': '2', 'type': 'projects'}]}"
3,4,,10.1007/s13222-016-0243-4,https://www.ncbi.nlm.nih.gov/pubmed/,"SABIO-RK, von Daten in der Publikation zur Suc...","[Wolfgang Müller, Meik Bittkowski, Martin Gole...",,Datenbank Spektrum 17(1) : 21,Datenbank Spektrum,2017-03-01,"{'data': [{'id': '2', 'type': 'projects'}]}"
4,5,27587694.0,,https://www.ncbi.nlm.nih.gov/pubmed/27587694,L1 regularization facilitates detection of cel...,"[B. Steiert, Jens Timmer, C. Kreutz]",MOTIVATION: A major goal of drug development i...,Bioinformatics. 2016 Sep 1;32(17):i718-i726. d...,Bioinformatics,2016-09-03,"{'data': [{'id': '5', 'type': 'projects'}]}"
5,6,27494133.0,,https://www.ncbi.nlm.nih.gov/pubmed/27494133,Identification of Cell Type-Specific Differenc...,"[R. Merkle, B. Steiert, F. Salopiata, S. Depne...","Lung cancer, with its most prevalent form non-...",PLoS Comput Biol. 2016 Aug 5;12(8):e1005049. d...,PLoS Comput Biol,2016-08-06,"{'data': [{'id': '5', 'type': 'projects'}]}"
6,7,27588423.0,,https://www.ncbi.nlm.nih.gov/pubmed/27588423,Driving the Model to Its Limit: Profile Likeli...,"[T. Maiwald, H. Hass, B. Steiert, J. Vanlier, ...","In systems biology, one of the major tasks is ...",PLoS One. 2016 Sep 2;11(9):e0162366. doi: 10.1...,PLoS One,2016-09-03,"{'data': [{'id': '5', 'type': 'projects'}]}"
7,11,,10.1016/j.jhep.2015.11.018,https://www.ncbi.nlm.nih.gov/pubmed/,Model-guided identification of a therapeutic s...,"[Ahmed Ghallab, Géraldine Cellière, Sebastian ...","BACKGROUND & AIMS: \r\n\r\nRecently, spatial-t...",Journal of Hepatology 64(4) : 860,Journal of Hepatology,2016-04-01,"{'data': [{'id': '7', 'type': 'projects'}]}"
8,12,,10.1007/s00204-016-1906-5,https://www.ncbi.nlm.nih.gov/pubmed/,In vivo imaging of systemic transport and elim...,"[Raymond Reif, Ahmed Ghallab, Lynette Beattie,...",\r\n\r\nWe describe a two-photon microscopy-ba...,Arch Toxicol 91(3) : 1335,Arch Toxicol,2017-03-01,"{'data': [{'id': '7', 'type': 'projects'}]}"
9,13,,10.1002/hep.28965,https://www.ncbi.nlm.nih.gov/pubmed/,The ascending pathophysiology of cholestatic l...,"[Peter Jansen, Ahmed Ghallab, Nachiket Vartak,...",In this review we develop the argument that ch...,Hepatology 65(2) : 722,Hepatology,2017-02-01,"{'data': [{'id': '7', 'type': 'projects'}]}"


## Create a publication "report"

In [5]:
import json
project_cache = dict()

def get_project(pid):
    """ Get project information, with caching. """
    if not pid in project_cache:
        url = SEEK_URL + 'projects/{}.json'.format(pid)
        project_cache[pid] = get_json(url)

    return project_cache[pid]

def createProjectReport(projectData):
    for project in projectData['data']:
        pid =  project['id']
        pdata = get_project(pid)
        title = pdata['attributes']['title'];
        print(json.dumps(pdata, indent=4, sort_keys=True))
    
def create_report(pub_json):
    print()
    print('------------------------------------------------------------------------------')
    for key in ['title', 'authors', 'citation', 'published_data', 'journal']:
        print(pub_json.get(key))
    
    # project information for publication
    projects = pub_json['projects']
    createProjectReport(projects)


In [6]:
# now iterate over all publications
# and create a small report for each of them
for p in publications[0:3]:
    create_report(p)


------------------------------------------------------------------------------
Specifications of Standards in Systems and Synthetic Biology: Status and Developments in 2016
['F. Schreiber', 'G. D. Bader', 'P. Gleeson', 'Martin Golebiewski', 'M. Hucka', 'N. Le Novere', 'C. Myers', 'D. Nickerson', 'B. Sommer', 'D. Walthemath']
J Integr Bioinform. 2016 Dec 18;13(3):289. doi: 10.2390/biecoll-jib-2016-289.
None
J Integr Bioinform
https://seek.lisym.org/projects/2.json
{
    "attributes": {
        "avatar": null,
        "default_license": "notspecified",
        "default_policy": {
            "access": "no_access",
            "permissions": [
                {
                    "access": "download",
                    "resource": {
                        "id": "2",
                        "type": "projects"
                    }
                },
                {
                    "access": "view",
                    "resource": {
                        "id": "13",
           