In [1]:
import pandas as pd

from ordinor.execution_context.rule_based import AtomicRule, Rule

In [2]:
ar_null = AtomicRule()

print(ar_null)

⊥ (null)


In [3]:
ar1 = AtomicRule(attr='customer_type', attr_type='categorical', attr_vals={'gold', 'silver'}, attr_dim='CT')  # ar1 is looser
ar2 = AtomicRule(attr='customer_type', attr_type='categorical', attr_vals={'gold'}, attr_dim='CT')            # ar2 is stricter

print(ar1)
print(ar2)

print(ar1 == ar1)
print(ar1 == ar2)

print(ar1 > ar2)
print(ar1 >= ar2)
print(ar1 < ar2)
print(ar1 <= ar2)

`customer_type` ∈ {'silver', 'gold'}
`customer_type` ∈ {'gold'}
True
False
False
False
True
True


In [4]:
ar3 = AtomicRule(attr='loan_amount', attr_type='numeric', attr_vals=pd.Interval(5000, 15000, closed='both'), attr_dim='CT') # ar3 is looser
ar4 = AtomicRule(attr='loan_amount', attr_type='numeric', attr_vals=pd.Interval(5000, 15000, closed='left'), attr_dim='CT') # ar4 is stricter

print(ar3)
print(ar4)

print(ar_null < ar3)
print(ar3 >= ar4)
print(ar3 <= ar_null)
print(ar3 != ar_null)

#print(ar1 < ar3)

`loan_amount` ∈ [5000, 15000]
`loan_amount` ∈ [5000, 15000)
True
True
False
True


In [5]:
rule = Rule(ars=[ar_null, ar_null, ar1, ar2, ar3])

print(rule)

rule_ct, rule_at, rule_tt = rule.to_types()

print(rule_ct)
print(rule_at)
print(rule_tt)

print(rule.get_attrs())

print(len(rule))

(`customer_type` ∈ {'silver', 'gold'}) ∧ (`customer_type` ∈ {'gold'}) ∧ (`loan_amount` ∈ [5000, 15000])
(`customer_type` ∈ {'silver', 'gold'}) ∧ (`customer_type` ∈ {'gold'}) ∧ (`loan_amount` ∈ [5000, 15000])
⊥ (null)
⊥ (null)
{'loan_amount', 'customer_type'}
3


In [6]:
rule = Rule(ars=[ar1])

ar4 = AtomicRule(attr='customer_type', attr_type='categorical', attr_vals={'normal'}, attr_dim='CT')

print(rule)

(`customer_type` ∈ {'silver', 'gold'})


In [7]:
from ordinor.io import read_disco_csv
el = read_disco_csv('data/processed/wabo.csv')

el['day'] = el['tt:day'].apply(lambda x: int(x[4:]))

print(el)

Importing from CSV file data/processed/wabo.csv
Scanned 6641 events from "data/processed/wabo.csv".
--------------------------------------------------------------------------------
Number of events:		6641
Number of cases:		1348
--------------------------------------------------------------------------------
      Unnamed: 0 case:concept:name org:resource  \
0              0        case-10011   Resource21   
1              1        case-10011   Resource10   
2              2        case-10011   Resource21   
3              3        case-10011   Resource21   
4              5        case-10017   Resource30   
...          ...               ...          ...   
6636        8572         case-9997   Resource06   
6637        8573         case-9997   Resource06   
6638        8574         case-9997   Resource06   
6639        8575         case-9997   Resource06   
6640        8576         case-9997   Resource06   

                       time:timestamp r:org:group  r:group  \
0    2011-10-11 

In [8]:
ar1 = AtomicRule(attr='concept:name', attr_type='categorical', attr_vals={'Confirmation of receipt', 'T02 Check confirmation of receipt'}, attr_dim='AT')

ar2 = AtomicRule(attr='ct:channel', attr_type='categorical', attr_vals={'Desk'}, attr_dim='CT')

rule = Rule(ars=[ar1])

print(rule)

print(rule.apply(el, index_only=True))

import numpy as np
ar3 = AtomicRule(attr='day', attr_type='numeric', attr_vals=pd.Interval(4, np.inf), attr_dim='TT')

rule = Rule(ars=[ar1, ar3])

print(rule)

print(rule.apply(el, index_only=True))

(`concept:name` ∈ {'T02 Check confirmation of receipt', 'Confirmation of receipt'})
Int64Index([   0,    1,    3,    5,    7,   10,   12,   17,   22,   31,
            ...
            6581, 6590, 6595, 6604, 6608, 6612, 6617, 6622, 6627, 6636],
           dtype='int64', length=1293)
(`concept:name` ∈ {'T02 Check confirmation of receipt', 'Confirmation of receipt'}) ∧ (`day` ∈ (4, inf])
Int64Index([   0,    1,    3,    5,    7,   10,   12,   17,   22,   31,
            ...
            6576, 6581, 6590, 6595, 6608, 6612, 6617, 6622, 6627, 6636],
           dtype='int64', length=1112)


In [9]:
from ordinor.execution_context.rule_based import NumericRuleGenerator

log = el[el['day'].isin({1, 2})]

print(set(log['day']))

rules = NumericRuleGenerator.HistogramSplit('day', 'TT', log, bins='fd')

rules

{1, 2}


[(`day` ∈ [1.0, 1.25)),
 (`day` ∈ [1.25, 1.5)),
 (`day` ∈ [1.5, 1.75)),
 (`day` ∈ [1.75, 2.0])]

In [10]:
from ordinor.execution_context.rule_based import CategoricalRuleGenerator

rules = list(CategoricalRuleGenerator.RandomTwoSubsetPartition('ct:channel', 'CT', el, n_sample=5))

rules

[[(`ct:channel` ∈ {'Post', 'Internet', 'Desk'}),
  (`ct:channel` ∈ {'Intern', 'e-mail'})],
 [(`ct:channel` ∈ {'Internet'}),
  (`ct:channel` ∈ {'e-mail', 'Intern', 'Desk', 'Post'})],
 [(`ct:channel` ∈ {'e-mail', 'Intern', 'Internet', 'Desk'}),
  (`ct:channel` ∈ {'Post'})],
 [(`ct:channel` ∈ {'Intern', 'Internet', 'Post'}),
  (`ct:channel` ∈ {'e-mail', 'Desk'})],
 [(`ct:channel` ∈ {'e-mail', 'Intern', 'Internet', 'Post'}),
  (`ct:channel` ∈ {'Desk'})]]