In [158]:
%load_ext autoreload
%autoreload 2
from IPython.display import display
from pkdb_analysis import PKFilter, PKData
from pkdb_analysis.data import PKDataFrame
import copy
import pandas as pd


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [159]:
# uncomment if you want to load from db.
#
#data = PKData.from_db()
#data.to_hdf5("test")

# Example queries

## 1. Query study

In [160]:
test_data = PKData.from_hdf5("test")

def f_idx_PKDB99999(d):
    return (d.study_sid == "PKDB99999")



In [161]:
print(test_data)

------------------------------
PKData (139657175407584)
------------------------------
groups            683  ( 5314)
individuals      3127  (27755)
interventions     765  (  765)
outputs         24192  (34331)
timecourses      1815  ( 2529)
------------------------------


In [163]:
t1 = test_data.intervention_pk_filter(f_idx_PKDB99999,concise=False)
t2 = test_data.intervention_pk_filter(f_idx_PKDB99999)



In [166]:
print("'concise' updates the remaining DataFrames. See the difference between t1 and t2.")
print(t1)
print(t2)

'concise' updates the remaining DataFrames. See the difference between t1 and t2.
------------------------------
PKData (139657060112760)
------------------------------
groups            683  ( 5314)
individuals      3127  (27755)
interventions       3  (    3)
outputs         24192  (34331)
timecourses      1815  ( 2529)
------------------------------
------------------------------
PKData (139657060778784)
------------------------------
groups              1  (    6)
individuals         6  (   42)
interventions       3  (    3)
outputs           105  (  105)
timecourses         2  (    2)
------------------------------


## Query groups and individuals
### 2.1 Get data for groups with characteristica/keywords X
healthy=True, smoking=N, disease=None,
individual queries and combinations.


In [167]:

def is_healthy(d): 
    return ((d["measurement_type"]=="healthy") & (d["choice"]=="Y"))
         
def not_disease(d):
    return  ~(d["measurement_type"]=="disease")

def smoker_n(d):
    return ((d["measurement_type"]=="smoking") & (d["choice"]=="N"))
        
def not_smoker_y(d):
    return ~((d["measurement_type"]=="smoking") & (d["choice"]=="N")) 
    

In [168]:
test_data = PKData.from_hdf5("test")




In [169]:
# idx can be a single function, or a list of functions. A list of functions are applied successively and is equivalent to "AND logic". "OR logic" can be directly applied on the index.
healthy_smoker = test_data.subject_pk_filter([is_healthy,not_disease,smoker_n,not_smoker_y])



In [170]:
print(healthy_smoker)

------------------------------
PKData (139657084373088)
------------------------------
groups            467  ( 3990)
individuals      1131  (11668)
interventions     637  (  637)
outputs         15604  (22721)
timecourses      1251  ( 1789)
------------------------------


## 3 Query interventions
### 3.1 Get outputs/timecourses for intervention with substance
intervention with measurement_type "dosing" and substance "caffeine"

In [156]:
def dosing_and_caffeine(d):
    return ((d["measurement_type"]=="dosing") & (d["substance"]=="caffeine"))

### 3.2 Get outputs/timecourses where multiple interventions were given

In [189]:
test_data = PKData.from_hdf5("test")



In [190]:
caffeine_data = test_data.intervention_pk_filter(dosing_and_caffeine)



In [188]:
print(caffeine_data)

------------------------------
PKData (139656996049864)
------------------------------
studies            29 
groups             57  (  388)
individuals       223  ( 1562)
interventions      65  (   65)
outputs          3207  ( 3241)
timecourses       219  (  220)
------------------------------


## 4 Query outputs/timecourses
### 4.1 query by measurement_type
query all auc_inf

In [None]:

def is_auc_inf(d):
    return (d["measurement_type"]=="dosing")
    

## 5 Other Query others
### 5.1 Complex
get clearance of codeine for all subjects, that have been phenotyped. 


In [195]:
def is_cyp2d6_phenotyped(d):
    cyp2d6_phenotype_substances = ['spar/(2hspar+5hspar)', 'deb/4hdeb', 'dtf/dmt']
    return (d["measurement_type"].isin(["metabolic phenotype", "metabolic ratio"])) & (d["substance"].isin(cyp2d6_phenotype_substances))

def codeine_clearance(d):
    return ((d["measurement_type"]=="clearance") & (d["substance"]=="codeine"))                                                         

In [196]:
test_data = PKData.from_hdf5("test")



In [197]:
phenotyped_data = test_data.output_pk_filter(is_phenotyping)



In [198]:
test_data.groups = phenotyped_data.groups
test_data.individuals = phenotyped_data.individuals
test_data = test_data.output_pk_filter(codeine_clearance)



In [199]:
print(test_data)

------------------------------
PKData (139657002707824)
------------------------------
studies            18 
groups             28  (  278)
individuals        36  (  296)
interventions      20  (   20)
outputs            75  (   75)
timecourses       176  (  176)
------------------------------
