# Caffeine pharmacokinetics data in SEEK
Upload of file assets to seek via the write/read API. Experimental data sets have been digitized from literature. The respective files are now made available via LiSyM-SEEK.

<img src="./data_extraction.png" width="500"/>

Proof-of-principle by **Matthias König, Hadas Leonov & Wolfgang Müller** 

In [9]:
# important ids (seekbeta.lisym.org)

# base_url = 'https://seekbeta.lisym.org/'

base_url = 'https://seek.lisym.org/'
API_TOKEN = open("./token").readline().strip() #"user:password" encoded in base64
REPLACEMENTS = {
    'KOENIG_ID': "17",
    'PROJECT_ID': "10",  # MM-PLF
    'INVESTIGATION_ID': "20",
    'HOMO_SAPIENS': "950657990",
}


#base_url = 'http://localhost:3000/'
#API_TOKEN = open("./token_localhost").readline().strip() #"user:password" encoded in base64
#REPLACEMENTS = {
#    'KOENIG_ID': "1",
#    'PROJECT_ID': "2",  # MM-PLF
#    'INVESTIGATION_ID': "1",
#    'HOMO_SAPIENS': "950657990",
#}

In [10]:
import time
import requests
import json
import string
from pprint import pprint

headers = {"Authorization": "Basic %s" %API_TOKEN,
           "Content-type": "application/vnd.api+json",
           "Accept": "application/vnd.api+json"}
file_upload_headers = headers.copy()
# file_upload_headers.pop('Content-type');
file_upload_headers['Content-type'] = "application/octet-stream";

In [14]:
def print_response(r, info=False):
    print(r.status_code)
    
    response_text = r.text
    if (r.ok):
        obj = json.loads(response_text)
        try: 
            seek_id = obj['data']['id']
        except:
            seek_id = '-'
        print("Returned successfully posted object: <{}>".format(seek_id))
        if info:
            pprint(obj)
        return obj
    else:
        print("Error: ", r.text)
    
    return None

In [12]:
def init_json_data(file, replacements=REPLACEMENTS):
    json_format = json.load(open(file))
    json_data = json.dumps(json_format)
    for key, value in replacements.items():
        json_data = str.replace(json_data, key, value)
    return json_data

### Post caffeine study
The investigation and project already exist and can be referenced via their respective ids. Now we create a new study for the caffeine data.

In [13]:
study_file = "./pkdb/json/caffeine_study.json"
url = base_url + "studies"
json_data = init_json_data(study_file, replacements=REPLACEMENTS)
# pprint(json_data)
print('-'*80)
pprint(json.loads(json_data))
print('-'*80)
r = requests.post(url, headers=headers, data=json_data)
study = print_response(r)

--------------------------------------------------------------------------------
{'data': {'attributes': {'description': 'Pharmacokinetics data set for '
                                        'caffeine',
                         'experimentalists': '',
                         'other_creators': '',
                         'person_responsible_id': '17',
                         'policy': {'access': 'view',
                                    'permissions': [{'access': 'manage',
                                                     'resource': {'id': '10',
                                                                  'type': 'projects'}}]},
                         'title': 'PKDB Caffeine Study'},
          'relationships': {'creators': {'data': [{'id': '17',
                                                   'type': 'people'}]},
                            'investigation': {'data': {'id': '20',
                                                       'type': 'investigations'}},
    

In [15]:
# store study id for assays
REPLACEMENTS['STUDY_ID'] = study['data']['id']

### Experimental assays & Data files
Every digitized publication is handled as a separate experimental assay.

The corresponding data files (CSV data) are associated as data file to the assay.

In [16]:
from os import listdir
from os.path import isfile, join
data_dir = './pkdb/files/data'
files = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
author_ids = set([fname[:-4] for fname in files if fname.endswith('.csv')])
pprint(author_ids)

{'Akinyinka2000',
 'Amchin1999',
 'Blanchard1983a',
 'Haller2002',
 'Healy1991',
 'Hetzler1990',
 'Jeppesen1996',
 'Kakuda2014',
 'Kaplan1997',
 'Magnusson2008',
 'Oh2012',
 'Perera2011',
 'Spigset1999a',
 'Tanaka2014'}


In [17]:
# create all
exp_assays = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)

    assay_file = "./pkdb/json/caffeine_exp_assay.json"
    url = base_url + "assays"
    json_data = init_json_data(assay_file)
    r = requests.post(url, headers=headers, data=json_data)
    assay = print_response(r)
    exp_assays[author_id] = assay

*** Amchin1999 ***
200
Returned successfully posted object: <142>
*** Akinyinka2000 ***
200
Returned successfully posted object: <143>
*** Blanchard1983a ***
200
Returned successfully posted object: <144>
*** Tanaka2014 ***
200
Returned successfully posted object: <145>
*** Kakuda2014 ***
200
Returned successfully posted object: <146>
*** Magnusson2008 ***
200
Returned successfully posted object: <147>
*** Haller2002 ***
200
Returned successfully posted object: <148>
*** Hetzler1990 ***
200
Returned successfully posted object: <149>
*** Healy1991 ***
200
Returned successfully posted object: <150>
*** Perera2011 ***
200
Returned successfully posted object: <151>
*** Spigset1999a ***
200
Returned successfully posted object: <152>
*** Jeppesen1996 ***
200
Returned successfully posted object: <153>
*** Kaplan1997 ***
200
Returned successfully posted object: <154>
*** Oh2012 ***
200
Returned successfully posted object: <155>


In [18]:
# -------------
# CSV
# -------------
csv_datafiles = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.csv'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'text/csv'
    REPLACEMENTS['TITLE'] = "CSV ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "text/csv"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r)
    csv_datafiles[author_id] = datafile

    # second, add the file (PUT)
    
    filepath = './pkdb/files/data/{}.csv'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    # url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)

*** Amchin1999 ***
200
Returned successfully posted object: <331>
http://seek.lisym.org/data_files/331/content_blobs/557
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Akinyinka2000 ***
200
Returned successfully posted object: <332>
http://seek.lisym.org/data_files/332/content_blobs/558
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Blanchard1983a ***
200
Returned successfully posted object: <333>
http://seek.lisym.org/data_files/333/content_blobs/559
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Tanaka2014 ***
200
Returned successfully posted object: <334>
http://seek.lisym.org/data_files/334/content_blobs/560
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Kakuda2014 ***
200
Returned successfully post

In [19]:
# -------------
# PNG
# -------------
png_datafiles = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.png'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'image/png'
    REPLACEMENTS['TITLE'] = "PNG ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "image/png"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r)
    png_datafiles[author_id] = datafile

    # second, add the file (PUT)
    filepath = './pkdb/files/data/{}.png'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    # url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)
    time.sleep(1)  # sleep another second for the database

*** Amchin1999 ***
200
Returned successfully posted object: <345>
http://seek.lisym.org/data_files/345/content_blobs/571
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Akinyinka2000 ***
200
Returned successfully posted object: <346>
http://seek.lisym.org/data_files/346/content_blobs/572
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Blanchard1983a ***
200
Returned successfully posted object: <347>
http://seek.lisym.org/data_files/347/content_blobs/573
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Tanaka2014 ***
200
Returned successfully posted object: <348>
http://seek.lisym.org/data_files/348/content_blobs/574
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Kakuda2014 ***
200
Returned successfully post

In [None]:
# make sure cleanup is not triggered automatically
raise NotImplementedError

# Cleanup SEEK

In [None]:
# delete datafiles (csv)
'''
for datafile in csv_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)
'''
# delete datafiles (png)
for datafile in png_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)

In [6]:
# 204-218
# 243-257
for id in range(204, 218):
    url = base_url + "data_files/{}.json".format(id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)
    print()

https://seek.lisym.org/data_files/204.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/205.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/206.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/207.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/208.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/209.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/210.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/211.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/212.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/213.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/214.json
200
Returned successfully posted object: <->

https://seek.lisym.or

In [None]:
# delete experimental assays
for assay in exp_assays.values():
    assay_id = assay['data']['id']
    url = base_url + "assays/{}.json".format(assay_id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)  

In [7]:
# 84-98
for id in range(84, 98):
    url = base_url + "assays/{}.json".format(id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)
    print()

https://seek.lisym.org/assays/84.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/85.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/86.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/87.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/88.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/89.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/90.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/91.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/92.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/93.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/94.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/95.json
200
Returned successfully posted objec

In [8]:
# delete study
# study_id = study['data']['id']
study_id = 17
url = base_url + "studies/{}.json".format(study_id)
print(url)
r = requests.delete(url, headers=headers)
print_response(r)

https://seek.lisym.org/studies/17.json
200
Returned successfully posted object: <->


{'status': 'ok'}