In [1]:
%load_ext autoreload
%autoreload 2
from IPython.display import display
from pkdb_analysis import PKFilter, PKData
from pkdb_analysis.data import PKDataFrame
import copy
import pandas as pd


In [2]:
# uncomment if you want to load from db.
#
#data = PKData.from_db()
#data.to_hdf5("test")

# Example queries

## 1. Query study

In [3]:
test_data = PKData.from_hdf5("all")

def f_idx_PKDB99999(d):
    return d.study_sid == "PKDB99999"



In [4]:
print(test_data)

------------------------------
PKData (140307593920464)
------------------------------
studies           237 
groups            683  ( 5314)
individuals      3127  (27755)
interventions     765  (  765)
outputs         24192  (34331)
timecourses      1815  ( 2529)
------------------------------


In [5]:
t1 = test_data.intervention_pk_filter(f_idx_PKDB99999,concise=False)
t2 = test_data.intervention_pk_filter(f_idx_PKDB99999)



In [6]:
print("'concise' updates the remaining DataFrames. See the difference between t1 and t2.")
print(t1)
print(t2)

'concise' updates the remaining DataFrames. See the difference between t1 and t2.
------------------------------
PKData (140307593944592)
------------------------------
studies           237 
groups            683  ( 5314)
individuals      3127  (27755)
interventions       3  (    3)
outputs         24192  (34331)
timecourses      1815  ( 2529)
------------------------------
------------------------------
PKData (140307593944200)
------------------------------
studies             1 
groups              1  (    6)
individuals         6  (   42)
interventions       3  (    3)
outputs           105  (  105)
timecourses         2  (    2)
------------------------------


## Query groups and individuals
### 2.1 Get data for groups with characteristica/keywords X
healthy=True, smoking=N, disease=None,
individual queries and combinations.


In [7]:

def is_healthy(d): 
    return (d["measurement_type"]=="healthy") & (d["choice"]=="Y")

def disease(d):
    return  d["measurement_type"]=="disease"

def smoking(d):
    return  d["measurement_type"]=="smoking"

def smoker_n(d):
    return smoking(d) & (d["choice"]=="N")

def smoker_y(d):
    return smoking(d) & (d["choice"]=="Y")
        


    

In [8]:
test_data = PKData.from_hdf5("test")



In [9]:
# idx can be a single function, or a list of functions. A list of functions are applied successively and is equivalent to "AND logic". "OR logic" can be directly applied on the index.
healthy_smoker_n_data = test_data.subject_pk_filter([is_healthy, smoker_n]).subject_pk_exclude([smoker_y, disease])




In [10]:
print(healthy_smoker_n_data)

------------------------------
PKData (140307626690992)
------------------------------
studies            75 
groups            104  (  994)
individuals       558  ( 5259)
interventions     220  (  220)
outputs          6066  ( 9432)
timecourses       378  (  602)
------------------------------


## 3 Query interventions
### 3.1 Get outputs/timecourses for intervention with substance
intervention with measurement_type "dosing" and substance "caffeine"

In [11]:
def dosing_and_caffeine(d):
    return ((d["measurement_type"]=="dosing") & (d["substance"]=="caffeine"))

### 3.2 Get outputs/timecourses where multiple interventions were given

In [12]:
test_data = PKData.from_hdf5("test")



In [13]:
caffeine_data = test_data.intervention_pk_filter(dosing_and_caffeine)



In [14]:
print(caffeine_data)

------------------------------
PKData (140306141018040)
------------------------------
studies            63 
groups            102  (  952)
individuals       726  ( 6435)
interventions      80  (   80)
outputs          5821  ( 5821)
timecourses       330  (  330)
------------------------------


## 4 Query outputs/timecourses
### 4.1 query by measurement_type
query all auc_inf

In [15]:

def is_auc_inf(d):
    return (d["measurement_type"]=="auc_inf")
    

## 5 Other Query others
### 5.1 Complex
get clearance of codeine for all subjects, that have been phenotyped for cyp2d6. 


In [16]:
def is_cyp2d6_phenotyped(d):
    cyp2d6_phenotype_substances = ['spar/(2hspar+5hspar)', 'deb/4hdeb', 'dtf/dmt']
    return d["measurement_type"].isin(["metabolic phenotype", "metabolic ratio"]) & d["substance"].isin(cyp2d6_phenotype_substances)

def codeine_clearance(d):
    return (d["measurement_type"]=="clearance") & (d["substance"]=="codeine")                                                        

In [17]:
test_data = PKData.from_hdf5("test")



In [18]:
phenotyped_data = test_data.output_pk_filter(is_cyp2d6_phenotyped)



In [19]:
test_data.groups = phenotyped_data.groups
test_data.individuals = phenotyped_data.individuals
test_data = test_data.output_pk_filter(codeine_clearance, keep_timecourses = False)




In [20]:
print(test_data)

------------------------------
PKData (140306192883896)
------------------------------
studies             4 
groups              6  (   60)
individuals        14  (   98)
interventions       4  (    4)
outputs            21  (   21)
timecourses         0  (    0)
------------------------------


## 6  Pitfalls 

In [21]:
test_data = PKData.from_hdf5("test")
# Wrong 
def is_healthy_smoker(d): 
    """ This will yield zero subjects. No characteristica satisfy measurement_type == 'healthy' and measurement_type == 'disease'. 
    """
    return ((d["measurement_type"]=="healthy") & (d["choice"]=="Y")) & ((d["measurement_type"]=="smoking") & (d["choice"]=="Y"))
         
# Correct 
def is_healthy_smoker(d): 
    """ """
    return [(d["measurement_type"]=="healthy") & (d["choice"]=="Y"), (d["measurement_type"]=="smoking") & (d["choice"]=="Y")]

   
# Wrong 
def not_smoker_y(d):
    """ Be care this might not do what you expect. Excluding a specific characteristica will not eliminate any subject unless it is the only characteristica.
    """
    return ~((d["measurement_type"]=="smoking") & (d["choice"]=="Y")) 
not_smoker_y_data = test_data.subject_pk_filter(not_smoker_y)

#Correct
# exlcude smoker
def smoker_y(d):
    return (d["measurement_type"]=="smoking") & (d["choice"]=="Y")
healthy_data = test_data.subject_pk_exclude(smoker_y)



# Wrong 
def not_disease(d):
    """ Be care this might not do what you expect. Excluding a specific characteristica will not eliminate any subject unless it is the only characteristica
    """
    return  ~(d["measurement_type"]=="disease")
healthy_data = test_data.subject_pk_filter(not_disease)

# Correct 
# exlcude the disease
def disease(d):
    return  d["measurement_type"]=="disease"
healthy_data = test_data.subject_pk_exclude(disease)


