In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from rules.rules import Rules
from rules.convert_rule_string_to_rule_dict import ConvertRuleStringToRuleDict

import pandas as pd
import numpy as np
import random
import cProfile, pstats

In [3]:
rule_logic_list = [
    "(X['account_number_avg_order_total_per_account_number_1day']>1)",
    "(X['sim_sc_ml']>0.5)",
    "(X['is_billing_shipping_city_same']==True)"
]

In [149]:
field_datatypes = {
    'account_number_avg_order_total_per_account_number_1day': 'DOUBLE',
    'sim_sc_ml': 'FLOAT',
    'is_billing_shipping_city_same': 'BOOLEAN'
}
cassandra_field_names = {
    'account_number_avg_order_total_per_account_number_1day': 'account_number.avg_order_total_per_account_number_1day',
    'sim_sc_ml': 'sim_sc_ml',
    'is_billing_shipping_city_same': 'is_billing_shipping_city_same'
}

In [4]:
rule_strings = {}
rule_scores = {}
for i in range(0, 1000000):
    rule_strings[f'Rule{i}'] = rule_logic_list[random.randint(0, 2)]
    rule_scores[f'Rule{i}'] = random.randint(-100, 0)

# Rule strings to dicts

## Old

In [7]:
filename = 'Rules_rule_strings_to_dicts_old.dat'

In [8]:
rules = Rules(rule_strings=rule_strings)
cProfile.run('rules.as_rule_dicts()', sort='cumtime', filename=f'{filename}.dat')

In [9]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Wed Jan 13 15:38:35 2021    Rules_rule_strings_to_dicts_old.dat.dat

         17000006 function calls in 16.742 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   16.742   16.742 {built-in method builtins.exec}
        1    0.000    0.000   16.742   16.742 <string>:1(<module>)
        1    0.000    0.000   16.742   16.742 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:62(as_rule_dicts)
        1    1.927    1.927   16.742   16.742 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:196(_rule_strings_to_rule_dicts)
  1000000    1.943    0.000   11.282    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_string_to_rule_dict.py:116(convert)
  1000000    0.813    0.000    8.605    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_string_to_rule_dict.py:189(_create_condition_dict)
  100000

<pstats.Stats at 0x1038d1a20>

## New

In [88]:
filename = 'Rules_rule_strings_to_dicts_new.dat'

In [89]:
rules = Rules(rule_strings=rule_strings)
cProfile.run('rules.as_rule_dicts()', sort='cumtime', filename=f'{filename}.dat')

In [90]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 17:08:54 2021    Rules_rule_strings_to_dicts_new.dat.dat

         14000008 function calls in 11.666 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   11.666   11.666 {built-in method builtins.exec}
        1    0.000    0.000   11.666   11.666 <string>:1(<module>)
        1    0.000    0.000   11.666   11.666 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:62(as_rule_dicts)
        1    0.000    0.000   11.666   11.666 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:185(_rule_strings_to_rule_dicts)
        1    0.677    0.677   11.666   11.666 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_strings_to_rule_dicts.py:122(convert)
  1000000    1.128    0.000   10.989    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_strings_to_rule_dicts.py:136(_convert_rule)
  1000000    

<pstats.Stats at 0x156802e48>

# Rule dicts to strings

In [97]:
rules = Rules(rule_strings=rule_strings)
rule_dicts = rules.as_rule_dicts()

## Old

In [106]:
filename = 'Rules_rule_dicts_to_strings_old.dat'

In [107]:
rules = Rules(rule_dicts=rule_dicts)
cProfile.run('rules.as_rule_strings(as_numpy=False)', sort='cumtime', filename=f'{filename}.dat')

In [108]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 17:23:22 2021    Rules_rule_dicts_to_strings_old.dat.dat

         92000018 function calls (77000018 primitive calls) in 35.154 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   35.154   35.154 {built-in method builtins.exec}
        1    0.000    0.000   35.154   35.154 <string>:1(<module>)
        1    0.267    0.267   35.154   35.154 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:78(as_rule_strings)
        1    0.000    0.000   34.887   34.887 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:177(_rule_dicts_to_rule_strings)
        1    0.144    0.144   28.801   28.801 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_rule_strings.py:14(__init__)
13000001/1   11.852    0.000   28.656   28.656 //anaconda3/lib/python3.7/copy.py:132(deepcopy)
2000001/1    7.569    0.000   28.656   28.656 //anaconda

<pstats.Stats at 0x1568149e8>

## New (deepcopy removed where not needed, replaced with json.dumps where needed)

In [240]:
filename = 'Rules_rule_dicts_to_strings_new.dat'

In [241]:
rules = Rules(rule_dicts=rule_dicts)
cProfile.run('rules.as_rule_strings(as_numpy=False)', sort='cumtime', filename=f'{filename}.dat')

In [242]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 18:56:44 2021    Rules_rule_dicts_to_strings_new.dat.dat

         12000008 function calls in 7.233 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    7.233    7.233 {built-in method builtins.exec}
        1    0.001    0.001    7.232    7.232 <string>:1(<module>)
        1    0.000    0.000    7.231    7.231 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:74(as_rule_strings)
        1    0.002    0.002    7.231    7.231 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:173(_rule_dicts_to_rule_strings)
        1    0.735    0.735    7.226    7.226 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_rule_strings.py:130(convert)
  1000000    0.555    0.000    6.491    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_rule_strings.py:188(_convert_rule)
  1000000   

<pstats.Stats at 0x31311cac8>

# ConvertRuleDictsToSystemDicts

## Old

In [252]:
filename = 'Rules_rule_dicts_to_system_dicts_old.dat'

In [253]:
rules = Rules(rule_dicts=rule_dicts)
cProfile.run('rules.as_system_dicts(field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)', sort='cumtime', filename=f'{filename}.dat')

In [254]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 19:06:13 2021    Rules_rule_dicts_to_system_dicts_old.dat.dat

         8000024 function calls in 67.072 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   67.072   67.072 {built-in method builtins.exec}
        1    0.000    0.000   67.072   67.072 <string>:1(<module>)
        1    0.028    0.028   67.072   67.072 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:129(as_system_dicts)
        1    0.000    0.000   67.044   67.044 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:149(_rule_dicts_to_system_dicts)
        1    0.019    0.019   61.576   61.576 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_system_dicts.py:15(__init__)
        1    0.001    0.001   57.617   57.617 //anaconda3/lib/python3.7/json/__init__.py:299(loads)
        1    0.000    0.000   57.616   57.616 //anaconda3/lib/python3.7/j

<pstats.Stats at 0x2eb944f98>

## New (with cPickle not json)

In [255]:
filename = 'Rules_rule_dicts_to_system_dicts_new.dat'

In [256]:
rules = Rules(rule_dicts=rule_dicts)
cProfile.run('rules.as_system_dicts(field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)', sort='cumtime', filename=f'{filename}.dat')

In [257]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 19:07:06 2021    Rules_rule_dicts_to_system_dicts_new.dat.dat

         8000010 function calls in 9.917 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    9.917    9.917 {built-in method builtins.exec}
        1    0.000    0.000    9.916    9.916 <string>:1(<module>)
        1    0.032    0.032    9.916    9.916 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:129(as_system_dicts)
        1    0.000    0.000    9.884    9.884 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:149(_rule_dicts_to_system_dicts)
        1    0.014    0.014    5.199    5.199 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_system_dicts.py:15(__init__)
        1    0.635    0.635    4.685    4.685 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_system_dicts.py:46(convert)
  1000000    0

<pstats.Stats at 0x32adb30f0>

# ConvertSystemDictsToRuleDicts

## Old

In [265]:
filename = 'Rules_system_dicts_to_rule_dicts_old.dat'

In [266]:
rules = Rules(rule_dicts=rule_dicts)
system_dicts = rules.as_system_dicts(field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)

In [267]:
rules = Rules(system_dicts=system_dicts)
cProfile.run('rules.as_rule_dicts()', sort='cumtime', filename=f'{filename}.dat')

In [268]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 19:15:15 2021    Rules_system_dicts_to_rule_dicts_old.dat.dat

         23000024 function calls in 72.016 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001   72.016   72.016 {built-in method builtins.exec}
        1    0.000    0.000   72.015   72.015 <string>:1(<module>)
        1    0.026    0.026   72.015   72.015 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:58(as_rule_dicts)
        1    0.001    0.001   71.989   71.989 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:161(_system_dicts_to_rule_dicts)
        1    0.023    0.023   61.509   61.509 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_system_dicts_to_rule_dicts.py:16(__init__)
        1    0.021    0.021   56.025   56.025 //anaconda3/lib/python3.7/json/__init__.py:183(dumps)
        1    0.001    0.001   56.003   56.003 //anaconda3/lib/python3.7/jso

<pstats.Stats at 0x410eb5e10>

## New (with cPickle not json)

In [262]:
filename = 'Rules_system_dicts_to_rule_dicts_new.dat'

In [263]:
rules = Rules(system_dicts=system_dicts)
cProfile.run('rules.as_rule_dicts()', sort='cumtime', filename=f'{filename}.dat')

In [264]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 19:12:23 2021    Rules_system_dicts_to_rule_dicts_new.dat.dat

         23000010 function calls in 14.706 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   14.706   14.706 {built-in method builtins.exec}
        1    0.000    0.000   14.706   14.706 <string>:1(<module>)
        1    0.033    0.033   14.706   14.706 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:58(as_rule_dicts)
        1    0.000    0.000   14.673   14.673 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:161(_system_dicts_to_rule_dicts)
        1    0.694    0.694    9.986    9.986 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_system_dicts_to_rule_dicts.py:61(convert)
  1000000    0.639    0.000    9.292    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_system_dicts_to_rule_dicts.py:80(_convert_rule)
  1000000 

<pstats.Stats at 0x40c011a20>

# ConvertRuleDictsToRuleLambdas

## Old

In [269]:
filename = 'Rules_rule_dicts_to_rule_lambdas_old.dat'

In [270]:
rules = Rules(rule_dicts=rule_dicts)
cProfile.run('rules.as_rule_lambdas(as_numpy=False, with_kwargs=True)', sort='cumtime', filename=f'{filename}.dat')

In [271]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Thu Jan 14 19:17:43 2021    Rules_rule_dicts_to_rule_lambdas_old.dat.dat

         16000010 function calls in 16.651 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001   16.651   16.651 {built-in method builtins.exec}
        1    0.000    0.000   16.650   16.650 <string>:1(<module>)
        1    0.000    0.000   16.650   16.650 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:98(as_rule_lambdas)
        1    0.001    0.001   16.650   16.650 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/rules.py:197(_rule_dicts_to_rule_lambdas)
        1    0.001    0.001   16.649   16.649 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_rule_lambdas.py:30(convert)
        1    4.725    4.725   16.647   16.647 /Users/jlaidler/Documents/tigress/tigress/argo/argo/rules/rules/convert_rule_dicts_to_rule_strings.py:152(_convert_to_lambda)
  

<pstats.Stats at 0x300ab78d0>

# General testing

In [272]:
from rules.convert_rule_strings_to_rule_dicts import ConvertRuleStringsToRuleDicts
from rules.convert_rule_dicts_to_rule_strings import ConvertRuleDictsToRuleStrings
from rules.convert_rule_dicts_to_system_dicts import ConvertRuleDictsToSystemDicts
from rules.convert_system_dicts_to_rule_dicts import ConvertSystemDictsToRuleDicts
from rules.convert_rule_dicts_to_rule_lambdas import ConvertRuleDictsToRuleLambdas

In [273]:
c = ConvertRuleStringsToRuleDicts(rule_strings=rule_strings)

In [274]:
rule_dicts = c.convert()

In [275]:
rule_strings['Rule1']

"(X['is_billing_shipping_city_same']==True)"

In [276]:
rule_dicts['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [277]:
c = ConvertRuleDictsToRuleStrings(rule_dicts=rule_dicts)

In [278]:
rs = c.convert(as_numpy=False)

In [279]:
rs['Rule1']

"(X['is_billing_shipping_city_same']==True)"

In [280]:
rule_dicts['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [281]:
c = ConvertRuleDictsToSystemDicts(rule_dicts=rule_dicts, field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)

In [282]:
sd = c.convert()

In [283]:
sd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': '1',
   'id': 'is_billing_shipping_city_same',
   'type': 'boolean',
   'input': 'radio'}]}

In [284]:
rule_dicts['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [285]:
c = ConvertSystemDictsToRuleDicts(system_dicts=sd)

In [286]:
rd = c.convert()

In [287]:
rd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [288]:
sd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': '1',
   'id': 'is_billing_shipping_city_same',
   'type': 'boolean',
   'input': 'radio'}]}

In [289]:
rules = Rules(rule_strings=rule_strings)

In [290]:
rs = rules.as_rule_strings(as_numpy=False)

In [291]:
rd = rules.as_rule_dicts()

In [292]:
rd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [293]:
sd = rules.as_system_dicts(field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)

In [294]:
rd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [295]:
rl = rules.as_rule_lambdas(as_numpy=False, with_kwargs=True)

In [296]:
rs['Rule1']

"(X['is_billing_shipping_city_same']==True)"

In [297]:
rd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': True}]}

In [298]:
sd['Rule1']

{'condition': 'AND',
 'rules': [{'field': 'is_billing_shipping_city_same',
   'operator': 'equal',
   'value': '1',
   'id': 'is_billing_shipping_city_same',
   'type': 'boolean',
   'input': 'radio'}]}

In [299]:
rl['Rule1'](**rules.lambda_kwargs['Rule1'])

"(X['is_billing_shipping_city_same']==True)"

In [114]:
import json
_rule_dicts = json.loads(json.dumps(rule_dicts))

In [122]:
%timeit _rule_dicts = json.loads(json.dumps(rule_dicts))

4.95 s ± 64.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [124]:
import copy

In [125]:
%timeit _rule_dicts = copy.deepcopy(rule_dicts)

13.2 s ± 857 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [127]:
import _pickle as cPickle

In [130]:
%timeit _rule_dicts = cPickle.loads(cPickle.dumps(rule_dicts, -1))

3.42 s ± 283 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [129]:
_rule_dicts == rule_dicts

True

In [119]:
_rule_dicts['Rule0']['condition']='OR'

In [120]:
_rule_dicts['Rule0']

{'condition': 'OR',
 'rules': [{'field': 'account_number_avg_order_total_per_account_number_1day',
   'operator': 'greater',
   'value': 1.0}]}

In [121]:
rule_dicts['Rule0']

{'condition': 'AND',
 'rules': [{'field': 'account_number_avg_order_total_per_account_number_1day',
   'operator': 'greater',
   'value': 1.0}]}

In [111]:
import cPickle

ModuleNotFoundError: No module named 'cPickle'

In [110]:
g = cPickle.loads(cPickle.dumps(a, -1))

NameError: name 'cPickle' is not defined

# General testing

In [97]:
rules = Rules(rule_strings=rule_strings)

In [94]:
rule_dicts = rules.as_rule_dicts()

In [95]:
r2 = Rules(rule_dicts=rule_dicts)

In [101]:
_ = r2.as_rule_dicts()

In [102]:
r3 = Rules(rule_strings=rule_strings)

In [104]:
rule_strings

{'Rule0': "(X['account_number_avg_order_total_per_account_number_1day']>1)",
 'Rule1': "(X['is_billing_shipping_city_same']==True)",
 'Rule2': "(X['sim_sc_ml']>0.5)",
 'Rule3': "(X['is_billing_shipping_city_same']==True)",
 'Rule4': "(X['sim_sc_ml']>0.5)",
 'Rule5': "(X['account_number_avg_order_total_per_account_number_1day']>1)",
 'Rule6': "(X['sim_sc_ml']>0.5)",
 'Rule7': "(X['sim_sc_ml']>0.5)",
 'Rule8': "(X['account_number_avg_order_total_per_account_number_1day']>1)",
 'Rule9': "(X['sim_sc_ml']>0.5)",
 'Rule10': "(X['is_billing_shipping_city_same']==True)",
 'Rule11': "(X['sim_sc_ml']>0.5)",
 'Rule12': "(X['is_billing_shipping_city_same']==True)",
 'Rule13': "(X['account_number_avg_order_total_per_account_number_1day']>1)",
 'Rule14': "(X['sim_sc_ml']>0.5)",
 'Rule15': "(X['is_billing_shipping_city_same']==True)",
 'Rule16': "(X['account_number_avg_order_total_per_account_number_1day']>1)",
 'Rule17': "(X['is_billing_shipping_city_same']==True)",
 'Rule18': "(X['sim_sc_ml']>0.5)"

In [103]:
r3.as_rule_strings(as_numpy=True)

{'Rule0': "(X['account_number_avg_order_total_per_account_number_1day'].to_numpy(na_value=np.nan)>1.0)",
 'Rule1': "(X['is_billing_shipping_city_same'].to_numpy(na_value=np.nan)==True)",
 'Rule2': "(X['sim_sc_ml'].to_numpy(na_value=np.nan)>0.5)",
 'Rule3': "(X['is_billing_shipping_city_same'].to_numpy(na_value=np.nan)==True)",
 'Rule4': "(X['sim_sc_ml'].to_numpy(na_value=np.nan)>0.5)",
 'Rule5': "(X['account_number_avg_order_total_per_account_number_1day'].to_numpy(na_value=np.nan)>1.0)",
 'Rule6': "(X['sim_sc_ml'].to_numpy(na_value=np.nan)>0.5)",
 'Rule7': "(X['sim_sc_ml'].to_numpy(na_value=np.nan)>0.5)",
 'Rule8': "(X['account_number_avg_order_total_per_account_number_1day'].to_numpy(na_value=np.nan)>1.0)",
 'Rule9': "(X['sim_sc_ml'].to_numpy(na_value=np.nan)>0.5)",
 'Rule10': "(X['is_billing_shipping_city_same'].to_numpy(na_value=np.nan)==True)",
 'Rule11': "(X['sim_sc_ml'].to_numpy(na_value=np.nan)>0.5)",
 'Rule12': "(X['is_billing_shipping_city_same'].to_numpy(na_value=np.nan)==Tr

# Unit Testing

In [16]:
rule_dicts = {'Rule1': {'condition': 'AND',
                            'rules': [{'condition': 'OR',
                                       'rules': [{'field': 'payer_id_sum_approved_txn_amt_per_paypalid_1day',
                                                  'operator': 'greater_or_equal',
                                                  'value': 60.0},
                                                 {'field': 'payer_id_sum_approved_txn_amt_per_paypalid_7day',
                                                  'operator': 'greater',
                                                  'value': 120.0},
                                                 {'field': 'payer_id_sum_approved_txn_amt_per_paypalid_30day',
                                                  'operator': 'less_or_equal',
                                                  'value': 500.0}]},
                                      {'field': 'num_items', 'operator': 'equal', 'value': 1.0}]},
                  'Rule2': {'condition': 'AND',
                            'rules': [{'field': 'ml_cc_v0', 'operator': 'less', 'value': 0.315},
                                      {'condition': 'OR',
                                       'rules': [{'field': 'method_clean',
                                                  'operator': 'equal',
                                                  'value': 'checkout'},
                                                 {'field': 'method_clean',
                                                  'operator': 'begins_with', 'value': 'checkout'},
                                                 {'field': 'method_clean',
                                                  'operator': 'ends_with', 'value': 'checkout'},
                                                 {'field': 'method_clean',
                                                  'operator': 'contains', 'value': 'checkout'},
                                                 {'field': 'ip_address',
                                                  'operator': 'is_not_null', 'value': None},
                                                 {'field': 'ip_isp', 'operator': 'is_not_empty', 'value': None}]}]},
                  'Rule3': {'condition': 'AND',
                            'rules': [{'field': 'method_clean',
                                       'operator': 'not_begins_with',
                                       'value': 'checkout'},
                                      {'field': 'method_clean',
                                       'operator': 'not_ends_with', 'value': 'checkout'},
                                      {'field': 'method_clean',
                                       'operator': 'not_contains', 'value': 'checkout'},
                                      {'condition': 'OR',
                                       'rules': [{'field': 'ip_address', 'operator': 'is_null', 'value': None},
                                                 {'field': 'ip_isp', 'operator': 'is_empty', 'value': None}]}]},
                  'Rule4': {'condition': 'AND',
                            'rules': [{'field': 'forwarder_address', 'operator': 'equal', 'value': True},
                                      {'field': 'is_shipping_billing_address_same',
                                       'operator': 'equal',
                                       'value': False}]},
                  'Rule5': {'condition': 'AND',
                            'rules': [{'field': 'ad_price_type',
                                       'operator': 'not_in',
                                       'value': ['FREE', 'NEGOTIATION']},
                                      {'field': 'ad_price_type', 'operator': 'in', 'value': ['FOO', 'BAR']}]},
                  'Rule6': {'condition': 'AND',
                            'rules': [{'field': 'ip_country_iso_code',
                                       'operator': 'equal_field',
                                       'value': 'billing_country'},
                                      {'field': 'country_id',
                                       'operator': 'not_equal_field',
                                       'value': 'ip_country_iso_code'}]}}

In [84]:
# r = Rules(rule_dicts={'Rule1': rule_dicts['Rule1']})
r = Rules(rule_dicts=rule_dicts)
rule_lambdas = r.as_rule_lambdas(as_numpy=False, with_kwargs=True)

In [85]:
rule_lambdas

{'Rule1': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>._make_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule2': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>._make_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule3': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>._make_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule4': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>._make_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule5': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>._make_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule6': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>._make_lambda.<locals>.<lambda>(**kwargs)>}

In [86]:
r.lambda_kwargs

{'Rule1': {'payer_id_sum_approved_txn_amt_per_paypalid_1day': 60.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_7day': 120.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_30day': 500.0,
  'num_items': 1.0},
 'Rule2': {'ml_cc_v0': 0.315,
  'method_clean': 'checkout',
  'method_clean%0': 'checkout',
  'method_clean%1': 'checkout',
  'method_clean%2': 'checkout'},
 'Rule3': {'method_clean': 'checkout',
  'method_clean%3': 'checkout',
  'method_clean%4': 'checkout'},
 'Rule4': {'forwarder_address': True,
  'is_shipping_billing_address_same': False},
 'Rule5': {'ad_price_type': ['FREE', 'NEGOTIATION'],
  'ad_price_type%5': ['FOO', 'BAR']},
 'Rule6': {'ip_country_iso_code': 'billing_country',
  'country_id': 'ip_country_iso_code'}}

In [87]:
for rule_name, rule_lambda in rule_lambdas.items():
    rule_string = rule_lambda(**r.lambda_kwargs[rule_name])
    print(rule_string)

((X['payer_id_sum_approved_txn_amt_per_paypalid_1day']>=60.0)|(X['payer_id_sum_approved_txn_amt_per_paypalid_7day']>120.0)|(X['payer_id_sum_approved_txn_amt_per_paypalid_30day']<=500.0))&(X['num_items']==1.0)
(X['ml_cc_v0']<0.315)&((X['method_clean']=='checkout')|(X['method_clean'].str.startswith('checkout', na=False))|(X['method_clean'].str.endswith('checkout', na=False))|(X['method_clean'].str.contains('checkout', na=False))|(~X['ip_address'].isna())|(X['ip_isp'].fillna('')!=''))
(~X['method_clean'].str.startswith('checkout', na=False))&(~X['method_clean'].str.endswith('checkout', na=False))&(~X['method_clean'].str.contains('checkout', na=False))&((X['ip_address'].isna())|(X['ip_isp'].fillna('')==''))
(X['forwarder_address']==True)&(X['is_shipping_billing_address_same']==False)
(~X['ad_price_type'].isin(['FREE', 'NEGOTIATION']))&(X['ad_price_type'].isin(['FOO', 'BAR']))
(X['ip_country_iso_code']==X['billing_country'])&(X['country_id']!=X['ip_country_iso_code'])


In [69]:
rule_lambdas['Rule6'](**r.lambda_kwargs['Rule6'])

"(X['ip_country_iso_code']==X['billing_country'])&(X['country_id']!=X['ip_country_iso_code'])"

In [70]:
from rules.convert_rule_dicts_to_rule_lambdas import ConvertRuleDictsToRuleLambdas

In [71]:
tolambdas = ConvertRuleDictsToRuleLambdas(rule_dicts=rule_dicts)

In [72]:
tolambdas.convert(as_numpy=False, with_kwargs=True)

((X['payer_id_sum_approved_txn_amt_per_paypalid_1day']>={payer_id_sum_approved_txn_amt_per_paypalid_1day})|(X['payer_id_sum_approved_txn_amt_per_paypalid_7day']>{payer_id_sum_approved_txn_amt_per_paypalid_7day})|(X['payer_id_sum_approved_txn_amt_per_paypalid_30day']<={payer_id_sum_approved_txn_amt_per_paypalid_30day}))&(X['num_items']=={num_items})
(X['ml_cc_v0']<{ml_cc_v0})&((X['method_clean']=='{method_clean}')|(X['method_clean'].str.startswith('{method_clean%0}', na=False))|(X['method_clean'].str.endswith('{method_clean%1}', na=False))|(X['method_clean'].str.contains('{method_clean%2}', na=False))|(~X['ip_address'].isna())|(X['ip_isp'].fillna('')!=''))
(~X['method_clean'].str.startswith('{method_clean}', na=False))&(~X['method_clean'].str.endswith('{method_clean%3}', na=False))&(~X['method_clean'].str.contains('{method_clean%4}', na=False))&((X['ip_address'].isna())|(X['ip_isp'].fillna('')==''))
(X['forwarder_address']=={forwarder_address})&(X['is_shipping_billing_address_same']=={i

{'Rule1': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule2': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule3': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule4': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule5': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>.<lambda>(**kwargs)>,
 'Rule6': <function rules.convert_rule_dicts_to_rule_strings.ConvertRuleDictsToRuleStrings._convert_to_lambda.<locals>.<lambda>(**kwargs)>}

In [81]:
tolambdas.rule_lambdas['Rule3'](**tolambdas.lambda_kwargs['Rule6'])

"(X['ip_country_iso_code']==X['billing_country'])&(X['country_id']!=X['ip_country_iso_code'])"

In [40]:
r.lambda_kwargs['Rule1']

{'payer_id_sum_approved_txn_amt_per_paypalid_1day': 60.0,
 'payer_id_sum_approved_txn_amt_per_paypalid_7day': 120.0,
 'payer_id_sum_approved_txn_amt_per_paypalid_30day': 500.0,
 'num_items': 1.0}

In [74]:
for rule_name, rule_lambda in rule_lambdas.items():
    rule_string = rule_lambda(**r.lambda_kwargs[rule_name])
#     assert rule_string == rule_strings_pandas[rule_name]
    print(rule_string)

KeyError: 'ip_country_iso_code'

In [34]:
r.lambda_kwargs

{'Rule1': {'payer_id_sum_approved_txn_amt_per_paypalid_1day': 60.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_7day': 120.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_30day': 500.0,
  'num_items': 1.0},
 'Rule2': {'payer_id_sum_approved_txn_amt_per_paypalid_1day': 60.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_7day': 120.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_30day': 500.0,
  'num_items': 1.0,
  'ml_cc_v0': 0.315,
  'method_clean': 'checkout',
  'method_clean%0': 'checkout',
  'method_clean%1': 'checkout',
  'method_clean%2': 'checkout'},
 'Rule3': {'payer_id_sum_approved_txn_amt_per_paypalid_1day': 60.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_7day': 120.0,
  'payer_id_sum_approved_txn_amt_per_paypalid_30day': 500.0,
  'num_items': 1.0,
  'ml_cc_v0': 0.315,
  'method_clean': 'checkout',
  'method_clean%0': 'checkout',
  'method_clean%1': 'checkout',
  'method_clean%2': 'checkout',
  'method_clean%3': 'checkout',
  'method_clean%4': 'checkout',
  'method_cle