In [1]:
import requests
import json
import pandas as pd
import seaborn as sns
import numpy as np

In [3]:
outputs_url = "http://0.0.0.0:8000/api/v1/outputs_elastic/?substance=caffeine&pktype=clearance&final=true"
units_factor = {'ml/min/kg':60 ,'ml/min':np.NaN, 'ml/h/kg':1}
units_mapping = {'ml/min/kg':'ml/h/kg','ml/min':None, 'ml/h/kg':'ml/h/kg'}                

In [4]:
def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

def norm_value(df_row):
    if df_row.value:
        value = df_row.value
    elif df_row.mean:
        value = df_row.mean
        
    factor = units_factor[df_row.unit]
    return value * factor
    

In [5]:
response = requests.get(outputs_url)

In [6]:
num_pages = response.json()["last_page"]

In [7]:
data = []
for page in range(2,num_pages +1):
    url = outputs_url + f"&page={page}"
    response = requests.get(url)
    data += response.json()["data"]["data"]

In [8]:
flatten_data = [flatten_json(d) for d in data]


In [15]:
df_outputs = pd.DataFrame(flatten_data)

In [16]:
individual_ids = set(df_outputs["individual_pk"].dropna())
individual_ids_joined = "__".join([str(int(id)) for id in individual_ids])


df_outputs["individual_pk"] = df_outputs["individual_pk"].fillna(0).astype(int)

In [17]:
individual_ids_joined


'55__56__57__58__59__60__61__62__63__64__65__66__67__68__69__70__71__72__73__74__161__162__163__164__165__166__167__168__169__170__171__172__173__174__175__176__177__178__179__180__181__182__183__184__185__186__187__188__189__190__191__192__208__209__210__211__212__213__214__215__216__217__218__223__224__225__226__227__228__229__230__231__232__233__234__235__236__237__238__239__240__241__242__243__244__245__246__247__248__249__250__251__252__253__254__255__256__257__258__259__260__261__262__263__264__265__266__308__309__310__311__312__313__314__315__316__317__318__319__320__321__322__323__324__325__326__327__328__329__330__331__332__333__334__335__336__337__338__339__340__341__342__343__344__345__346__347__348__349__350__351__352__353__354__355__356__357__358__359__360__361__362__363__364__365__366__367__368__369__370__371__372__373__374__375__376__377__378__379__380__381__382__383__384__385__386__406__407__408__409__410__411__412__413__414__415__416__417__418__419__420__421__422__423_

In [23]:
response  = requests.get(f"http://0.0.0.0:8000/api/v1/individuals_elastic/?ids={individual_ids_joined}")
num_pages = response.json()["last_page"]
indivdiuals_url  = f"http://0.0.0.0:8000/api/v1/individuals_elastic/?ids={individual_ids_joined}"
data = []
for page in range(2,num_pages +1):
    url = indivdiuals_url + f"&page={page}"
    response = requests.get(url)
    data += response.json()["data"]["data"]

In [26]:
flatten_data = [flatten_json(d) for d in data]
df_individuals = pd.DataFrame(flatten_data)

In [29]:
complete = pd.merge(df_individuals,df_outputs, left_on="pk", right_on="individual_pk")

In [30]:
complete

Unnamed: 0,characteristica_all_final_0_category,characteristica_all_final_0_choice,characteristica_all_final_0_count,characteristica_all_final_0_ctype,characteristica_all_final_0_cv,characteristica_all_final_0_max,characteristica_all_final_0_mean,characteristica_all_final_0_median,characteristica_all_final_0_min,characteristica_all_final_0_pk,...,pk_y,pktype,sd,se,substance_name,time,time_unit,tissue,unit,value
0,age,,12,group,,39.00,,,24.00,273,...,527,clearance,,,caffeine,,,plasma,ml/h/kg,196.00
1,age,,12,group,,39.00,,,24.00,273,...,528,clearance,,,caffeine,,,plasma,ml/h/kg,203.00
2,age,,12,group,,39.00,,,24.00,273,...,529,clearance,,,caffeine,,,plasma,ml/h/kg,188.00
3,age,,12,group,,39.00,,,24.00,273,...,530,clearance,,,caffeine,,,plasma,ml/h/kg,223.00
4,age,,12,group,,39.00,,,24.00,273,...,531,clearance,,,caffeine,,,plasma,ml/h/kg,225.00
5,age,,12,group,,39.00,,,24.00,273,...,532,clearance,,,caffeine,,,plasma,ml/h/kg,257.00
6,age,,12,group,,39.00,,,24.00,273,...,533,clearance,,,caffeine,,,plasma,ml/h/kg,63.00
7,age,,12,group,,39.00,,,24.00,273,...,534,clearance,,,caffeine,,,plasma,ml/h/kg,67.00
8,age,,12,group,,39.00,,,24.00,273,...,535,clearance,,,caffeine,,,plasma,ml/h/kg,84.00
9,age,,12,group,,39.00,,,24.00,273,...,536,clearance,,,caffeine,,,plasma,ml/h/kg,105.00


In [93]:
def get_bodyweight(x):
    row_categories = list(x[x == "weight"].index)
    fields = ["value","mean","median","choice"]
    for field in fields:
        row_choices = [category[:-8]+field for category in row_categories]
        for bodyweight in list(x[row_choices]):
            try:
                if float(bodyweight) > 0:
                    return float(bodyweight)
            except:
                pass
    
    


def get_dosing(x):
    row_categories = list(x[x == "dosing"].index)
    row_choices = [category[:-8]+"value" for category in row_categories]
    
    # x[row_choices]
    return list(x[row_choices].dropna())    
    

In [94]:
complete.apply(get_bodyweight, axis=1)

0      63.0
1      63.0
2      63.0
3      63.0
4      63.0
5      63.0
6      80.0
7      80.0
8      80.0
9      80.0
10     80.0
11     80.0
12     80.0
13     80.0
14     80.0
15     80.0
16     80.0
17     80.0
18     65.0
19     65.0
20     65.0
21     65.0
22     65.0
23     65.0
24     65.0
25     65.0
26     65.0
27     65.0
28     65.0
29     65.0
       ... 
430     NaN
431     NaN
432     NaN
433     NaN
434     NaN
435     NaN
436    88.0
437    88.0
438    60.0
439    60.0
440    85.0
441    85.0
442    85.0
443    85.0
444    63.0
445    63.0
446    58.0
447    58.0
448    62.0
449    62.0
450    91.0
451    91.0
452    69.0
453    69.0
454    61.0
455    61.0
456    75.0
457    75.0
458    46.0
459    46.0
Length: 460, dtype: float64

In [92]:
complete["study_name"]

0          Balogh1992
1          Balogh1992
2          Balogh1992
3          Balogh1992
4          Balogh1992
5          Balogh1992
6          Balogh1992
7          Balogh1992
8          Balogh1992
9          Balogh1992
10         Balogh1992
11         Balogh1992
12         Balogh1992
13         Balogh1992
14         Balogh1992
15         Balogh1992
16         Balogh1992
17         Balogh1992
18         Balogh1992
19         Balogh1992
20         Balogh1992
21         Balogh1992
22         Balogh1992
23         Balogh1992
24         Balogh1992
25         Balogh1992
26         Balogh1992
27         Balogh1992
28         Balogh1992
29         Balogh1992
            ...      
430       Parsons1978
431       Parsons1978
432       Parsons1978
433       Parsons1978
434       Parsons1978
435       Parsons1978
436    ZylberKatz1984
437    ZylberKatz1984
438    ZylberKatz1984
439    ZylberKatz1984
440    ZylberKatz1984
441    ZylberKatz1984
442    ZylberKatz1984
443    ZylberKatz1984
444    Zyl