# Caffeine pharmacokinetics data in SEEK
Upload of file assets to seek via the write/read API. Experimental data sets have been digitized from literature. The respective files are now made available via LiSyM-SEEK.

<img src="./data_extraction.png" width="500"/>

Proof-of-principle by **Matthias König, Hadas Leonov & Wolfgang Müller** 

In [1]:
# important ids (seekbeta.lisym.org)

# base_url = 'https://seekbeta.lisym.org/'

base_url = 'https://seek.lisym.org/'
API_TOKEN = open("./token").readline().strip() #"user:password" encoded in base64
REPLACEMENTS = {
    'KOENIG_ID': "17",
    'PROJECT_ID': "10",  # MM-PLF
    'INVESTIGATION_ID': "20",
    'HOMO_SAPIENS': "950657990",
}


#base_url = 'http://localhost:3000/'
#API_TOKEN = open("./token_localhost").readline().strip() #"user:password" encoded in base64
#REPLACEMENTS = {
#    'KOENIG_ID': "1",
#    'PROJECT_ID': "2",  # MM-PLF
#    'INVESTIGATION_ID': "1",
#    'HOMO_SAPIENS': "950657990",
#}

In [2]:
import requests
import json
import string
from pprint import pprint

headers = {"Authorization": "Basic %s" %API_TOKEN,
           "Content-type": "application/vnd.api+json",
           "Accept": "application/vnd.api+json"}
file_upload_headers = headers.copy()
file_upload_headers.pop('Content-type');

In [3]:
def print_response(r, info=False):
    print(r.status_code)
    response_text = r.text
    if (r.ok):
        obj = json.loads(response_text)
        try: 
            seek_id = obj['data']['id']
        except:
            seek_id = '-'
        print("Returned successfully posted object: <{}>".format(seek_id))
        if info:
            pprint(obj)
        return obj
    else:
        print("Error: ", r.text)
    return None

In [4]:
def init_json_data(file, replacements=REPLACEMENTS):
    json_format = json.load(open(file))
    json_data = json.dumps(json_format)
    for key, value in replacements.items():
        json_data = str.replace(json_data, key, value)
    return json_data

### Post caffeine study
The investigation and project already exist and can be referenced via their respective ids. Now we create a new study for the caffeine data.

In [5]:
study_file = "./pkdb/json/caffeine_study.json"
url = base_url + "studies"
json_data = init_json_data(study_file, replacements=REPLACEMENTS)
# pprint(json_data)
print('-'*80)
pprint(json.loads(json_data))
print('-'*80)
r = requests.post(url, headers=headers, data=json_data)
study = print_response(r)

--------------------------------------------------------------------------------
{'data': {'attributes': {'description': 'Pharmacokinetics data set for '
                                        'caffeine',
                         'experimentalists': '',
                         'other_creators': '',
                         'person_responsible_id': '17',
                         'policy': {'access': 'view',
                                    'permissions': [{'access': 'manage',
                                                     'resource': {'id': '10',
                                                                  'type': 'projects'}}]},
                         'title': 'PKDB Caffeine Study'},
          'relationships': {'creators': {'data': [{'id': '17',
                                                   'type': 'people'}]},
                            'investigation': {'data': {'id': '20',
                                                       'type': 'investigations'}},
    

In [6]:
# store study id for assays
REPLACEMENTS['STUDY_ID'] = study['data']['id']

### Experimental assays & Data files
Every digitized publication is handled as a separate experimental assay.

The corresponding data files (CSV data) are associated as data file to the assay.

In [7]:
from os import listdir
from os.path import isfile, join
data_dir = './pkdb/files/data'
files = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
author_ids = set([fname[:-4] for fname in files if fname.endswith('.csv')])
pprint(author_ids)

{'Akinyinka2000',
 'Amchin1999',
 'Blanchard1983a',
 'Haller2002',
 'Healy1991',
 'Hetzler1990',
 'Jeppesen1996',
 'Kakuda2014',
 'Kaplan1997',
 'Magnusson2008',
 'Oh2012',
 'Perera2011',
 'Spigset1999a',
 'Tanaka2014'}


In [8]:
# create all
exp_assays = {}
for author_id in author_ids:
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)

    assay_file = "./pkdb/json/caffeine_exp_assay.json"
    url = base_url + "assays"
    json_data = init_json_data(assay_file)
    r = requests.post(url, headers=headers, data=json_data)
    assay = print_response(r)
    exp_assays[author_id] = assay

200
Returned successfully posted object: <28>
200
Returned successfully posted object: <29>
200
Returned successfully posted object: <30>
200
Returned successfully posted object: <31>
200
Returned successfully posted object: <32>
200
Returned successfully posted object: <33>
200
Returned successfully posted object: <34>
200
Returned successfully posted object: <35>
200
Returned successfully posted object: <36>
200
Returned successfully posted object: <37>
200
Returned successfully posted object: <38>
200
Returned successfully posted object: <39>
200
Returned successfully posted object: <40>
200
Returned successfully posted object: <41>


In [12]:
# -------------
# CSV
# -------------
csv_datafiles = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.csv'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'text/csv'
    REPLACEMENTS['TITLE'] = "CSV ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "text/csv"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r)
    csv_datafiles[author_id] = datafile

    # second, add the file (PUT)
    
    filepath = './pkdb/files/data/{}.csv'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    # url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)

*** Amchin1999 ***
200
Returned successfully posted object: <157>
http://seek.lisym.org/data_files/157/content_blobs/367
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Tanaka2014 ***
200
Returned successfully posted object: <158>
http://seek.lisym.org/data_files/158/content_blobs/368
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Perera2011 ***
200
Returned successfully posted object: <159>
http://seek.lisym.org/data_files/159/content_blobs/369
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Healy1991 ***
200
Returned successfully posted object: <160>
http://seek.lisym.org/data_files/160/content_blobs/370
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Hetzler1990 ***
200
Returned successfully posted obje

In [14]:
# -------------
# PNG
# -------------
png_datafiles = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.png'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'image/png'
    REPLACEMENTS['TITLE'] = "PNG ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "image/png"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r)
    png_datafiles[author_id] = datafile

    # second, add the file (PUT)
    filepath = './pkdb/files/data/{}.png'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    # url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)

*** Amchin1999 ***
200
Returned successfully posted object: <171>
http://seek.lisym.org/data_files/171/content_blobs/381
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Tanaka2014 ***
200
Returned successfully posted object: <172>
http://seek.lisym.org/data_files/172/content_blobs/382
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Perera2011 ***
200
Returned successfully posted object: <173>
http://seek.lisym.org/data_files/173/content_blobs/383
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Healy1991 ***
200
Returned successfully posted object: <174>
http://seek.lisym.org/data_files/174/content_blobs/384
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Hetzler1990 ***
200
Returned successfully posted obje

In [None]:
# make sure cleanup is not triggered automatically
raise NotImplementedError

# Cleanup SEEK

In [None]:
# delete datafiles (csv)
'''
for datafile in csv_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)
'''
# delete datafiles (png)
for datafile in png_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)

In [13]:
for id in range(129, 157):
    url = base_url + "data_files/{}.json".format(id)
    r = requests.delete(url, headers=headers)
    print_response(r)

200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Returned successfully posted object: <->
200
Return

In [None]:
# delete experimental assays
for assay in exp_assays.values():
    assay_id = assay['data']['id']
    url = base_url + "assays/{}.json".format(assay_id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)  

In [None]:
# r = requests.delete(base_url + "assays/13.json", headers=headers)
# print_response(r)  

In [None]:
# delete study
# study_id = study['data']['id']
study_id = 12
url = base_url + "studies/{}.json".format(study_id)
print(url)
r = requests.delete(url, headers=headers)
print_response(r)