In [1]:
import pandas as pd

from ordinor.execution_context.rule_based import AtomicRule, Rule

In [2]:
ar_null = AtomicRule()

print(ar_null)

⊥ (null)


In [3]:
ar1 = AtomicRule(attr='customer_type', attr_type='categorical', attr_vals={'gold', 'silver'}, attr_dim='CT')  # ar1 is looser
ar2 = AtomicRule(attr='customer_type', attr_type='categorical', attr_vals={'gold'}, attr_dim='CT')            # ar2 is stricter

print(ar1)
print(ar2)

print(ar1 == ar1)
print(ar1 == ar2)

print(ar1 > ar2)
print(ar1 >= ar2)
print(ar1 < ar2)
print(ar1 <= ar2)

`customer_type` ∈ {'gold', 'silver'}
`customer_type` ∈ {'gold'}
True
False
False
False
True
True


In [4]:
ar3 = AtomicRule(attr='loan_amount', attr_type='numeric', attr_vals=pd.Interval(5000, 15000, closed='both'), attr_dim='CT') # ar3 is looser
ar4 = AtomicRule(attr='loan_amount', attr_type='numeric', attr_vals=pd.Interval(5000, 15000, closed='left'), attr_dim='CT') # ar4 is stricter

print(ar3)
print(ar4)

print(ar_null < ar3)
print(ar3 >= ar4)
print(ar3 <= ar_null)
print(ar3 != ar_null)

#print(ar1 < ar3)

`loan_amount` ∈ [5000, 15000]
`loan_amount` ∈ [5000, 15000)
True
True
False
True


In [5]:
ar5 = AtomicRule(attr='is_vip', attr_type='boolean', attr_vals=True, attr_dim='CT')
ar6 = AtomicRule(attr='is_vip', attr_type='boolean', attr_vals=False, attr_dim='CT')

print(ar5)
print(ar6)

print(ar_null < ar5)
#print(ar5 >= ar6)
print(ar5 <= ar_null)
print(ar5 != ar_null)
print(ar5 != ar6)
print(-ar5)
print(-ar5 == ar6)
print(ar5 == -ar6)

`is_vip` == True
`is_vip` == False
True
False
True
True
`is_vip` == False
True
True


In [6]:
rule = Rule(ars=[ar_null, ar_null, ar1, ar2, ar3])

print(rule)

rule_ct, rule_at, rule_tt = rule.to_types()

print(rule_ct)
print(rule_at)
print(rule_tt)

print(rule.get_attrs())

print(len(rule))

(`customer_type` ∈ {'gold', 'silver'}) ∧ (`customer_type` ∈ {'gold'}) ∧ (`loan_amount` ∈ [5000, 15000])
(`customer_type` ∈ {'gold', 'silver'}) ∧ (`customer_type` ∈ {'gold'}) ∧ (`loan_amount` ∈ [5000, 15000])
⊥ (null)
⊥ (null)
{'customer_type', 'loan_amount'}
3


In [7]:
rule = Rule(ars=[ar1])

ar4 = AtomicRule(attr='customer_type', attr_type='categorical', attr_vals={'normal'}, attr_dim='CT')

print(rule)

(`customer_type` ∈ {'gold', 'silver'})


In [8]:
from ordinor.io import read_disco_csv
el = read_disco_csv('data/processed/wabo.csv')

el['day'] = el['tt:day'].apply(lambda x: int(x[4:]))

print(el)

Importing from CSV file data/processed/wabo.csv
Scanned 8577 events from "data/processed/wabo.csv".
--------------------------------------------------------------------------------
Number of events:		8577
Number of cases:		1434
--------------------------------------------------------------------------------
      Unnamed: 0 case:concept:name org:resource  \
0              0        case-10011   Resource21   
1              1        case-10011   Resource10   
2              2        case-10011   Resource21   
3              3        case-10011   Resource21   
4              4        case-10017   Resource30   
...          ...               ...          ...   
8572        8572         case-9997   Resource06   
8573        8573         case-9997   Resource06   
8574        8574         case-9997   Resource06   
8575        8575         case-9997   Resource06   
8576        8576         case-9997   Resource06   

                       time:timestamp r:org:group  r:group  \
0    2011-10-11 

In [9]:
ar1 = AtomicRule(attr='concept:name', attr_type='categorical', attr_vals={'Confirmation of receipt', 'T02 Check confirmation of receipt'}, attr_dim='AT')

ar2 = AtomicRule(attr='ct:channel', attr_type='categorical', attr_vals={'Desk'}, attr_dim='CT')

rule = Rule(ars=[ar1])

print(rule)

print(rule.apply(el, index_only=True))

import numpy as np
ar3 = AtomicRule(attr='day', attr_type='numeric', attr_vals=pd.Interval(4, np.inf), attr_dim='TT')

rule = Rule(ars=[ar1, ar3])

print(rule)

print(rule.apply(el, index_only=True))

(`concept:name` ∈ {'Confirmation of receipt', 'T02 Check confirmation of receipt'})
Int64Index([   0,    1,    3,    4,    6,    8,   11,   13,   14,   19,
            ...
            8547, 8548, 8553, 8554, 8559, 8560, 8565, 8566, 8571, 8572],
           dtype='int64', length=2802)
(`concept:name` ∈ {'Confirmation of receipt', 'T02 Check confirmation of receipt'}) ∧ (`day` ∈ (4, inf])
Int64Index([   0,    1,    3,    4,    6,    8,   11,   13,   14,   19,
            ...
            8547, 8548, 8553, 8554, 8559, 8560, 8565, 8566, 8571, 8572],
           dtype='int64', length=2422)


In [10]:
from ordinor.execution_context.rule_based import NumericRuleGenerator

log = el[el['day'].isin({1, 2})]

print(set(log['day']))

rules = NumericRuleGenerator.HistogramSplit('day', 'TT', log, bins='fd')

rules

{1, 2}


[(`day` ∈ [1.0, 1.2)),
 (`day` ∈ [1.2, 1.4)),
 (`day` ∈ [1.4, 1.6)),
 (`day` ∈ [1.6, 1.8)),
 (`day` ∈ [1.8, 2.0])]

In [21]:
from ordinor.execution_context.rule_based import CategoricalRuleGenerator

rules = list(CategoricalRuleGenerator.RandomTwoSubsetPartition('concept:name', 'AT', el, n_sample=6))

for rule in rules:
    print(len(rule[0].apply(el, index_only=True)) + len(rule[1].apply(el, index_only=True)))

Int64Index([   0,    2,    4,    7,    9,   10,   12,   13,   18,   19,
            ...
            8547, 8552, 8553, 8558, 8559, 8564, 8565, 8570, 8571, 8576],
           dtype='int64', length=2993)
Int64Index([   1,    3,    5,    6,    8,   11,   14,   15,   16,   17,
            ...
            8562, 8563, 8566, 8567, 8568, 8569, 8572, 8573, 8574, 8575],
           dtype='int64', length=5584)
Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,   10,
            ...
            8562, 8563, 8565, 8566, 8568, 8569, 8571, 8572, 8574, 8575],
           dtype='int64', length=5829)
Int64Index([   9,   15,   18,   21,   24,   27,   30,   31,   33,   37,
            ...
            8549, 8552, 8555, 8558, 8561, 8564, 8567, 8570, 8573, 8576],
           dtype='int64', length=2748)
Int64Index([   0,    2,    4,    5,    7,   10,   12,   13,   15,   16,
            ...
            8562, 8563, 8565, 8567, 8568, 8569, 8571, 8573, 8574, 8575],
           dtype='int64', length=5649)


In [None]:
el = pd.get_dummies(el, columns=['ct:channel'], prefix_sep='_@_', dtype=bool)
el

In [None]:
from ordinor.execution_context.rule_based import CategoricalRuleGenerator

bool_rules = CategoricalRuleGenerator.BooleanPartition('ct:channel_@_Internet', 'CT', el)
print(bool_rules)

cat_rules = list(CategoricalRuleGenerator.RandomTwoSubsetPartition('ct:channel_@_Internet', 'CT', el))[0]
print(cat_rules)

In [None]:
el = pd.concat([el] * 100)
print(len(el))

def apply_rules(rules):
    for rule in rules:
        rule.apply(el, index_only=True)
    
%timeit apply_rules(cat_rules)
%timeit apply_rules(bool_rules)