# Bulk Import of Data via Files into CDD

In [1]:
import requests
import json
import pandas as pd
import time
import yaml

In [2]:
# load the API token

with open('config.yaml') as f:
    # use safe_load instead of load
    mytoken = yaml.safe_load(f)

In [3]:
# transfer original file format from xlsx to csv

xlsx_path = '/Users/songluli/Documents/CDD_API/KRAS_ADME/'
file_xlsx_name = 'ADME-SLC-LogP-220211-1.xlsx'
file_xlsx = pd.read_excel(xlsx_path + file_xlsx_name)
file_csv_name = file_xlsx_name[:-5]+'.csv'
file_xlsx.to_csv (xlsx_path + file_csv_name, index = None, header=True)

In [4]:
# save the mapping template and toke before importing with API
# mapping template: ADME-SLC-LM_SL, ADME-SLC-LogP_SL, ADME-SLC-Solubility_SL

# Upload a Data File and Assign Mapping Parameters
headers = mytoken['headers']
files = {'file': open(xlsx_path + file_csv_name, 'rb')}
url = 'https://app.collaborativedrug.com/api/v1/vaults/4686/slurps'
data = {'project': 'KRAS', 
        'mapping_template':'ADME-SLC-LogP_SL',
        'runs': {'run_date': '2022-02-12', 'place' : 'Pharmaron', 'person' : 'Songlu Li', 'conditions' : ''}, 
        'autoreject': 'true'} 


# Check Import Status

response_post = requests.post(url, headers=headers, data={'json': json.dumps(data)}, files=files)

# View Run Information for the Import

response_request = requests.request("GET","https://app.collaborativedrug.com/api/v1/vaults/4686/slurps/"+str(response_post.json()['id']),headers=headers)

In [None]:
# check every 30s and end the processing session after 10 mins

timeout = 600  # [seconds]
check_period = 30
start_time = time.time()

while True:
    current_time = time.time()
    elapsed_time = current_time - start_time    
    response_request = requests.request("GET","https://app.collaborativedrug.com/api/v1/vaults/4686/slurps/"+str(response_post.json()['id']),headers=headers)
    
    if response_request.json()['state'] == 'committed':
        print('total_records: ' + str(response_request.json()['total_records']) + ','
            'records_committed: ' + str(response_request.json()['records_committed']),
            f'running time: {current_time - start_time}')
        break
    
    elif elapsed_time >= timeout:
        print(str(response_request.json()['id']) + 'Upload failed due to long running time, chat with DS team')
        break

    time.sleep(check_period)