# Caffeine pharmacokinetics data in SEEK
Upload of file assets to seek via the write/read API. Experimental data sets have been digitized from literature. The respective files are now made available via LiSyM-SEEK.

<img src="./data_extraction.png" width="500"/>

Proof-of-principle by **Matthias König, Hadas Leonov & Wolfgang Müller** 

In [17]:
# important ids (seekbeta.lisym.org)
REPLACEMENTS = {
    'KOENIG_ID': "17",
    'PROJECT_ID': "10",  # MM-PLF
    'INVESTIGATION_ID': "17",
    'HOMO_SAPIENS': "950657990",
}

# localhost
REPLACEMENTS = {
    'KOENIG_ID': "1",
    'PROJECT_ID': "1",  # MM-PLF
    'INVESTIGATION_ID': "11",
    'HOMO_SAPIENS': "950657990",
}

In [22]:
import requests
import json
import string
from pprint import pprint
base_url = 'http://localhost:3000/'
# base_url = 'https://seekbeta.lisym.org/'

API_TOKEN = open("./token").readline().strip() #"user:password" encoded in base64
headers = {"Authorization": "Basic %s" %API_TOKEN,
           "Content-type": "application/vnd.api+json",
           "Accept": "application/vnd.api+json"}
file_upload_headers = headers.copy()
file_upload_headers.pop('Content-type');

In [23]:
def print_response(response_text, info=False):
    if (r.ok):
        obj = json.loads(response_text)
        try: 
            seek_id = obj['data']['id']
        except:
            seek_id = '-'
        print("Returned successfully posted object: <{}>".format(seek_id))
        if info:
            pprint(obj)
        return obj
    else:
        print("Error: ", r.text)
    return None

In [24]:
def init_json_data(file, replacements=REPLACEMENTS):
    json_format = json.load(open(file))
    json_data = json.dumps(json_format)
    for key, value in replacements.items():
        json_data = str.replace(json_data, key, value)
    return json_data

### Post caffeine study
The investigation and project already exist and can be referenced via their respective ids. Now we create a new study for the caffeine data.

In [25]:
study_file = "./pkdb/json/caffeine_study.json"
url = base_url + "studies"
json_data = init_json_data(study_file, replacements=REPLACEMENTS)
# pprint(json_data)
print('-'*80)
pprint(json.loads(json_data))
print('-'*80)
r = requests.post(url, headers=headers, data=json_data)
study = print_response(r.text)

--------------------------------------------------------------------------------
{'data': {'attributes': {'description': 'Pharmacokinetics data set for '
                                        'caffeine',
                         'experimentalists': '',
                         'other_creators': '',
                         'person_responsible_id': '1',
                         'policy': {'access': 'view',
                                    'permissions': [{'access': 'download',
                                                     'resource_id': '1',
                                                     'resource_type': 'projects'}]},
                         'title': 'PKDB Caffeine Study'},
          'relationships': {'creators': {'data': [{'id': '1',
                                                   'type': 'people'}]},
                            'investigation': {'data': {'id': '11',
                                                       'type': 'investigations'}},
              

In [43]:
# store study id for assays
REPLACEMENTS['STUDY_ID'] = study['data']['id']

### Experimental assays & Data files
Every digitized publication is handled as a separate experimental assay.

The corresponding data files (CSV data) are associated as data file to the assay.

In [93]:
from os import listdir
from os.path import isfile, join
data_dir = './pkdb/files/data'
files = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
author_ids = set([fname[:-4] for fname in files if fname.endswith('.csv')])
pprint(author_ids)

{'Akinyinka2000',
 'Amchin1999',
 'Blanchard1983a',
 'Haller2002',
 'Healy1991',
 'Hetzler1990',
 'Jeppesen1996',
 'Kakuda2014',
 'Kaplan1997',
 'Magnusson2008',
 'Oh2012',
 'Perera2011',
 'Spigset1999a',
 'Tanaka2014'}


In [94]:
# create all
exp_assays = {}
for author_id in author_ids:
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)

    assay_file = "./pkdb/json/caffeine_exp_assay.json"
    url = base_url + "assays"
    json_data = init_json_data(assay_file)
    r = requests.post(url, headers=headers, data=json_data)
    assay = print_response(r.text)
    exp_assays[author_id] = assay

Returned successfully posted object: <118>
Returned successfully posted object: <119>
Returned successfully posted object: <120>
Returned successfully posted object: <121>
Returned successfully posted object: <122>
Returned successfully posted object: <123>
Returned successfully posted object: <124>
Returned successfully posted object: <125>
Returned successfully posted object: <126>
Returned successfully posted object: <127>
Returned successfully posted object: <128>
Returned successfully posted object: <129>
Returned successfully posted object: <130>
Returned successfully posted object: <131>


In [112]:
# -------------
# CSV
# -------------
csv_datafiles = {}
for author_id in author_ids:
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.csv'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'text/csv'
    REPLACEMENTS['TITLE'] = "CSV ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "text/csv"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r.text)
    csv_datafiles[author_id] = datafile

    # second, add the file (PUT)
    # THIS IS NOT WORKING, GETTING 404s
    
    filepath = './pkdb/files/data/{}.csv'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)

Returned successfully posted object: <551>
https://seekbeta.lisym.org/data_files/551/content_blobs/985
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <552>
https://seekbeta.lisym.org/data_files/552/content_blobs/986
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <553>
https://seekbeta.lisym.org/data_files/553/content_blobs/987
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <554>
https://seekbeta.lisym.org/data_files/554/content_blobs/988
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <555>
https://seekbeta.lisym.org/data_files/555/content_blobs/989
Response Status: <Respons

In [115]:
# -------------
# PNG
# -------------
png_datafiles = {}
for author_id in author_ids:
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.png'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'image/png'
    REPLACEMENTS['TITLE'] = "PNG ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "image/png"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r.text)
    png_datafiles[author_id] = datafile

    # second, add the file (PUT)
    # THIS IS NOT WORKING, GETTING 404s
    filepath = './pkdb/files/data/{}.png'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)

Returned successfully posted object: <579>
https://seekbeta.lisym.org/data_files/579/content_blobs/1013
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <580>
https://seekbeta.lisym.org/data_files/580/content_blobs/1014
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <581>
https://seekbeta.lisym.org/data_files/581/content_blobs/1015
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <582>
https://seekbeta.lisym.org/data_files/582/content_blobs/1016
Response Status: <Response [200]>
--------------------------------------------------------------------------------
Returned successfully posted object: <583>
https://seekbeta.lisym.org/data_files/583/content_blobs/1017
Response Status: <Re

# Cleanup SEEK

In [114]:
# delete datafiles (csv)
'''
for datafile in csv_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)
'''
# delete datafiles (png)
for datafile in png_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)

https://seekbeta.lisym.org/data_files/578/content_blobs/1012
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/565.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/566.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/567.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/568.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/569.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/570.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/571.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/572.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/573.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/data_files/574.json
Returned successfully posted object: <->
https://seekbeta.l

In [100]:
for id in range(418, 432):
    url = base_url + "data_files/{}.json".format(id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)

Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->
Returned successfully posted object: <->


In [92]:
# delete experimental assays
for assay in exp_assays.values():
    assay_id = assay['data']['id']
    url = base_url + "assays/{}.json".format(assay_id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r.text)  

https://seekbeta.lisym.org/assays/104.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/105.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/106.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/107.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/108.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/109.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/110.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/111.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/112.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/113.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/114.json
Returned successfully posted object: <->
https://seekbeta.lisym.org/assays/115.json
Returned successfully posted obje

In [None]:
# delete study
study_id = study['data']['id']
url = base_url + "studies/{}.json".format(study_id)
print(url)
r = requests.delete(url, headers=headers)
print_response(r.text)