In [8]:
import numpy as np
import pandas as pd
from raimitigations.cohort.cohort_definition import CohortDefinition

df = pd.DataFrame({
    "race":     ['elf', 'orc', 'halfling', 'human', 'halfling', 'orc', 'elf', 'orc', 'human', 'orc'],
    "height(m)":[1.6,   1.95,  1.40,       1.75,     1.53,      2.10,   1.85,  1.79,  1.65,   np.nan],
    "past_score":[85,   59,    19,          89,      91,        79,      45,   82,    47,     87  ],
    "score":    [90,    43,    29,          99,      85,        73,      58,   94,    37,     51]
})
df

Unnamed: 0,race,height(m),past_score,score
0,elf,1.6,85,90
1,orc,1.95,59,43
2,halfling,1.4,19,29
3,human,1.75,89,99
4,halfling,1.53,91,85
5,orc,2.1,79,73
6,elf,1.85,45,58
7,orc,1.79,82,94
8,human,1.65,47,37
9,orc,,87,51


In [9]:
conditions = [
                [ ['race', '==', 'elf'], 'or', ['race', '==', 'orc'] ],
                'and',
                ['height(m)', '>=', 1.8]
            ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
5,orc,2.1,79,73
6,elf,1.85,45,58


In [10]:
conditions = [
                [ ['race', '==', ['elf', 'orc'] ] ],
                'and',
                ['height(m)', '>=', 1.8]
            ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
5,orc,2.1,79,73
6,elf,1.85,45,58


In [11]:
conditions = [ ['height(m)', '==', np.nan] ]
cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
9,orc,,87,51


In [12]:
conditions = [ [ ['height(m)', 'range', [1.1, 1.7]], 'and', ['race', '!=', 'halfling'] ] ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
0,elf,1.6,85,90
8,human,1.65,47,37


In [13]:
conditions = [ ['height(m)', '>', 1.5],
              'and',
              ['height(m)', '<', 1.99],
              'and',
              ['score', '<=', 70]
            ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
6,elf,1.85,45,58
8,human,1.65,47,37


In [14]:
conditions = [ ['score', '<=', 'past_score'] ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
4,halfling,1.53,91,85
5,orc,2.1,79,73
8,human,1.65,47,37
9,orc,,87,51


In [15]:
cht_def.save("single_cohort.json")

In [16]:
new_cht = CohortDefinition("single_cohort.json")
subset = new_cht.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
4,halfling,1.53,91,85
5,orc,2.1,79,73
8,human,1.65,47,37
9,orc,,87,51
