In [1]:
import pandas as pd
from raimitigations.cohort.cohort_definition import CohortDefinition

df = pd.DataFrame({
    "race":     ['elf', 'orc', 'halfling', 'human', 'halfling', 'orc', 'elf', 'orc', 'human'],
    "height(m)":[1.6,   1.95,  1.40,       1.75,     1.53,      2.10,   1.85,  1.79,  1.65],
    "past_score":[85,   59,    19,          89,      91,        79,      45,   82,    47 ],
    "score":    [90,    43,    29,          99,      85,        73,      58,   94,    37]
})
df

Unnamed: 0,race,height(m),past_score,score
0,elf,1.6,85,90
1,orc,1.95,59,43
2,halfling,1.4,19,29
3,human,1.75,89,99
4,halfling,1.53,91,85
5,orc,2.1,79,73
6,elf,1.85,45,58
7,orc,1.79,82,94
8,human,1.65,47,37


In [2]:
conditions = [
                [ ['race', '==', 'elf'], 'or', ['race', '==', 'orc'] ],
                'and',
                ['height(m)', '>=', 1.8]
            ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
5,orc,2.1,79,73
6,elf,1.85,45,58


In [3]:
conditions = [ [ ['height(m)', 'range', [1.1, 1.7]], 'and', ['race', '!=', 'halfling'] ] ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
0,elf,1.6,85,90
8,human,1.65,47,37


In [4]:
conditions = [ ['height(m)', '>', 1.5],
              'and',
              ['height(m)', '<', 1.99],
              'and',
              ['score', '<=', 70]
            ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
6,elf,1.85,45,58
8,human,1.65,47,37


In [5]:
subset = df.query("`score` < `past_score`")
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
4,halfling,1.53,91,85
5,orc,2.1,79,73
8,human,1.65,47,37


In [6]:
conditions = [ ['score', '<=', 'past_score'] ]

cht_def = CohortDefinition(conditions)
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
4,halfling,1.53,91,85
5,orc,2.1,79,73
8,human,1.65,47,37


In [7]:
cht_def.save("single_cohort.json")

In [8]:
new_cht = CohortDefinition("single_cohort.json")
subset = cht_def.get_cohort_subset(df)
subset

Unnamed: 0,race,height(m),past_score,score
1,orc,1.95,59,43
4,halfling,1.53,91,85
5,orc,2.1,79,73
8,human,1.65,47,37


In [9]:
try:
    conditions = [ ['race', '==', 'elf'], 'xor', ['race', '==', 'orc'] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ [ ['race', '==', 'elf'], 'or', ['race', '==', 'orc'] ], ['height(m)', '>=', 1.8] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['race', '==', 'elf'], 10, ['race', '==', 'orc'] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ [ ['race', '==', 'elf'], 'or', ['race', '==', 'orc'] ], 'and' ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

ERROR: expected one of the following connectors: ['and', 'or']. Instead, found the connector: 'xor'
ERROR: expected one of the following connectors: ['and', 'or']. Instead, found another condition: '['height(m)', '>=', 1.8]'
ERROR: invalid value found in cohort condition: 10
ERROR: expected a complementary condition associated to the connector 'and'. Partial query found: ((`race` == 'elf') or (`race` == 'orc')) and .


In [10]:
try:
    conditions = [ ['height(m)', 'range', [1.1, 1.7], 10] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['height(m)', 'r', [1.1, 1.7]] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['height(m)', 'range', []] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['height(m)', '>', [1.1, 1.7]] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['height(m)', 'range', [1.1, 1.7, 1]] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['height(m)', 'range', 1.1] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['race', '<=', 'elf'] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

try:
    conditions = [ ['height(m)', 'range', {}] ]
    cht_def = CohortDefinition(conditions)
except Exception as e:
    print(e)

ERROR: each condition must be comprised of exactly three values: [COLUMN, OPERATOR, VALUE], where COLUMN is either string representing the column name or an integer representing the column index, OPERATOR is one of the following operators: ['>', '>=', '<', '<=', '==', 'range', '!='], and VALUE representing the value associated to the operator and the specified column.
ERROR: invalid operator found. Expected one of the following operators: ['>', '>=', '<', '<=', '==', 'range', '!=']. Instead, found r.
ERROR: invalid list passed as the value for a condition. Expected a list with at least one value, but got an empty list. Condition: height(m) range [].
ERROR: a list value is not allowed for the operator >.
ERROR: the value associated to the RANGE operator 'range' must be a list with only two numbers: the minimum and maximum values of the range, respectively. Instead, got the following list with 3 elements: [1.1, 1.7, 1].
ERROR: invalid operator 'range' associated to value '1.1'. The allow