# Caffeine pharmacokinetics data in SEEK
Upload of file assets to seek via the write/read API. Experimental data sets have been digitized from literature. The respective files are now made available via LiSyM-SEEK.

<img src="./data_extraction.png" width="500"/>

Proof-of-principle by **Matthias König, Hadas Leonov & Wolfgang Müller** 

## API token
For the connection with the database an API token is required.
Make sure this token is not tracked in the repository.

```
echo -n 'user:password' | base64 > token
```

In [2]:
'''
base_url = 'https://seek.lisym.org/'

API_TOKEN = open("./token").readline().strip()
REPLACEMENTS = {
    'KOENIG_ID': "17",
    'PROJECT_ID': "10",  # MM-PLF
    'INVESTIGATION_ID': "20",
    'HOMO_SAPIENS': "950657990",
}
'''
base_url = 'https://fairdomhub.org/'

API_TOKEN = open("./token_fairdom").readline().strip()
REPLACEMENTS = {
    'KOENIG_ID': "678",
    'PROJECT_ID': "46",  # MM-PLF
    'INVESTIGATION_ID': "96",
    'HOMO_SAPIENS': "950657990",
}

In [3]:
import time
import requests
import json
import string
from pprint import pprint

headers = {"Authorization": "Basic %s" %API_TOKEN,
           "Content-type": "application/vnd.api+json",
           "Accept": "application/vnd.api+json"}
file_upload_headers = headers.copy()
# file_upload_headers.pop('Content-type');
file_upload_headers['Content-type'] = "application/octet-stream";

In [4]:
def print_response(r, info=False):
    print(r.status_code)
    
    response_text = r.text
    if (r.ok):
        obj = json.loads(response_text)
        try: 
            seek_id = obj['data']['id']
        except:
            seek_id = '-'
        print("Returned successfully posted object: <{}>".format(seek_id))
        if info:
            pprint(obj)
        return obj
    else:
        print("Error: ", r.text)
    
    return None

In [5]:
def init_json_data(file, replacements=REPLACEMENTS):
    json_format = json.load(open(file))
    json_data = json.dumps(json_format)
    for key, value in replacements.items():
        json_data = str.replace(json_data, key, value)
    return json_data

### Post caffeine study
The investigation and project already exist and can be referenced via their respective ids. Now we create a new study for the caffeine data.

In [6]:
study_file = "./pkdb/json/caffeine_study.json"
url = base_url + "studies"
json_data = init_json_data(study_file, replacements=REPLACEMENTS)
# pprint(json_data)
print('-'*80)
pprint(json.loads(json_data))
print('-'*80)
r = requests.post(url, headers=headers, data=json_data)
study = print_response(r)

--------------------------------------------------------------------------------
{'data': {'attributes': {'description': 'Pharmacokinetics data set for '
                                        'caffeine',
                         'experimentalists': '',
                         'other_creators': '',
                         'person_responsible_id': '678',
                         'policy': {'access': 'view',
                                    'permissions': [{'access': 'manage',
                                                     'resource': {'id': '46',
                                                                  'type': 'projects'}}]},
                         'title': 'PKDB Caffeine Study'},
          'relationships': {'creators': {'data': [{'id': '678',
                                                   'type': 'people'}]},
                            'investigation': {'data': {'id': '96',
                                                       'type': 'investigations'}},
  

In [14]:
# store study id for assays
REPLACEMENTS['STUDY_ID'] = study['data']['id']

### Experimental assays & Data files
Every digitized publication is handled as a separate experimental assay.

The corresponding data files (CSV data) are associated as data file to the assay.

In [15]:
from os import listdir
from os.path import isfile, join
data_dir = './pkdb/files/data'
files = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
author_ids = set([fname[:-4] for fname in files if fname.endswith('.csv')])
pprint(author_ids)

{'Akinyinka2000',
 'Amchin1999',
 'Blanchard1983a',
 'Haller2002',
 'Healy1991',
 'Hetzler1990',
 'Jeppesen1996',
 'Kakuda2014',
 'Kaplan1997',
 'Magnusson2008',
 'Oh2012',
 'Perera2011',
 'Spigset1999a',
 'Tanaka2014'}


In [16]:
# create all
exp_assays = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)

    assay_file = "./pkdb/json/caffeine_exp_assay.json"
    url = base_url + "assays"
    json_data = init_json_data(assay_file)
    r = requests.post(url, headers=headers, data=json_data)
    assay = print_response(r)
    exp_assays[author_id] = assay

*** Hetzler1990 ***
200
Returned successfully posted object: <187>
*** Spigset1999a ***
200
Returned successfully posted object: <188>
*** Akinyinka2000 ***
200
Returned successfully posted object: <189>
*** Perera2011 ***
200
Returned successfully posted object: <190>
*** Amchin1999 ***
200
Returned successfully posted object: <191>
*** Oh2012 ***
200
Returned successfully posted object: <192>
*** Kakuda2014 ***
200
Returned successfully posted object: <193>
*** Haller2002 ***
200
Returned successfully posted object: <194>
*** Healy1991 ***
200
Returned successfully posted object: <195>
*** Blanchard1983a ***
200
Returned successfully posted object: <196>
*** Tanaka2014 ***
200
Returned successfully posted object: <197>
*** Kaplan1997 ***
200
Returned successfully posted object: <198>
*** Magnusson2008 ***
200
Returned successfully posted object: <199>
*** Jeppesen1996 ***
200
Returned successfully posted object: <200>


In [17]:
# -------------
# CSV
# -------------
csv_datafiles = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.csv'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'text/csv'
    REPLACEMENTS['TITLE'] = "CSV ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "text/csv"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r)
    csv_datafiles[author_id] = datafile

    # second, add the file (PUT)
    
    filepath = './pkdb/files/data/{}.csv'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    # url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)

*** Hetzler1990 ***
200
Returned successfully posted object: <408>
https://seek.lisym.org/data_files/408/content_blobs/722
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Spigset1999a ***
200
Returned successfully posted object: <409>
https://seek.lisym.org/data_files/409/content_blobs/723
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Akinyinka2000 ***
200
Returned successfully posted object: <410>
https://seek.lisym.org/data_files/410/content_blobs/724
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Perera2011 ***
200
Returned successfully posted object: <411>
https://seek.lisym.org/data_files/411/content_blobs/725
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Amchin1999 ***
200
Returned successfully p

In [18]:
# -------------
# PNG
# -------------
png_datafiles = {}
for author_id in author_ids:
    print('*** ' + author_id + ' ***')
    # get the correct assay
    assay = exp_assays[author_id]

    REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
    REPLACEMENTS['AUTHOR_ID'] = author_id 
    REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)
    REPLACEMENTS['FILENAME'] = '{}.png'.format(author_id)
    REPLACEMENTS['CONTENT_TYPE'] = 'image/png'
    REPLACEMENTS['TITLE'] = "PNG ({})".format(author_id)

    # create the metadata
    REPLACEMENTS['CONTENT_TYPE'] = "image/png"
    df_file = "./pkdb/json/caffeine_data_file.json"
    url = base_url + "data_files"
    json_data = init_json_data(df_file)
    r = requests.post(url, headers=headers, data=json_data)
    datafile = print_response(r)
    png_datafiles[author_id] = datafile

    # second, add the file (PUT)
    filepath = './pkdb/files/data/{}.png'.format(author_id)
    url = datafile['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
    # bugfix for incorrect prefix
    # url = url.replace('http://seek', 'https://seekbeta')
    print(url)
    r = requests.put(url, headers=file_upload_headers, data=open(filepath, 'rb'))
    print("Response Status:", r)
    print('-'*80)
    time.sleep(1)  # sleep another second for the database

*** Hetzler1990 ***
200
Returned successfully posted object: <422>
https://seek.lisym.org/data_files/422/content_blobs/736
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Spigset1999a ***
200
Returned successfully posted object: <423>
https://seek.lisym.org/data_files/423/content_blobs/737
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Akinyinka2000 ***
200
Returned successfully posted object: <424>
https://seek.lisym.org/data_files/424/content_blobs/738
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Perera2011 ***
200
Returned successfully posted object: <425>
https://seek.lisym.org/data_files/425/content_blobs/739
Response Status: <Response [200]>
--------------------------------------------------------------------------------
*** Amchin1999 ***
200
Returned successfully p

In [None]:
# make sure cleanup is not triggered automatically
raise NotImplementedError

# Cleanup SEEK

In [None]:
# delete datafiles (csv)
'''
for datafile in csv_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)
'''
# delete datafiles (png)
for datafile in png_datafiles.values():
    datafile_id = datafile['data']['id']
    print(url)
    url = base_url + "data_files/{}.json".format(datafile_id)
    r = requests.delete(url, headers=headers)
    print_response(r.text)

In [6]:
# 331-359
for id in range(331, 359):
    url = base_url + "data_files/{}.json".format(id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)
    print()

https://seek.lisym.org/data_files/331.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/332.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/333.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/334.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/335.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/336.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/337.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/338.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/339.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/340.json
200
Returned successfully posted object: <->

https://seek.lisym.org/data_files/341.json
200
Returned successfully posted object: <->

https://seek.lisym.or

In [None]:
# delete experimental assays
for assay in exp_assays.values():
    assay_id = assay['data']['id']
    url = base_url + "assays/{}.json".format(assay_id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)  

In [7]:
for id in range(142, 156):
    url = base_url + "assays/{}.json".format(id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r)
    print()

https://seek.lisym.org/assays/142.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/143.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/144.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/145.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/146.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/147.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/148.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/149.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/150.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/151.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/152.json
200
Returned successfully posted object: <->

https://seek.lisym.org/assays/153.json
200
Returned successfully 

In [8]:
# delete study
# study_id = study['data']['id']
study_id = 24
url = base_url + "studies/{}.json".format(study_id)
print(url)
r = requests.delete(url, headers=headers)
print_response(r)

https://seek.lisym.org/studies/24.json
200
Returned successfully posted object: <->


{'status': 'ok'}