In [2]:
%load_ext autoreload
%autoreload 2

In [9]:
from rule.rules import Rules
import numpy as np
import pandas as pd

In [55]:
np.random.seed(0)
X = pd.DataFrame(
    {
        'A': np.random.uniform(0, 100, 100),
        'B': ['foo', 'bar'] * 50,
        'C': [1.0, 2.0, 3.0, np.nan] * 25,
        'D': ['foo', 'bar', np.nan, 'no'] * 25,
        'E': [1.0,2.0,3.0,4.0] * 25,
        'F': [1.0,2.0,3.0,5.0] * 25,
        'G': ['foo', '', np.nan, ''] * 25,
        
    }
)

In [60]:
rule_dicts = {
        'Rule1': {'condition': 'AND',
                  'rules': [{'condition': 'OR',
                             'rules': [{'field': 'A',
                                        'operator': 'greater_or_equal',
                                        'value': 60.0},
                                       {'field': 'B',
                                        'operator': 'begins_with',
                                        'value': 'foo'},
                                       {'field': 'C',
                                        'operator': 'is_null',
                                        'value': None}]},
                            {'field': 'D',
                             'operator': 'in', 'value': ['foo', 'bar']},
                            {'field': 'E',
                             'operator': 'equal_field',
                             'value': 'F'},
                            {'field': 'G',
                             'operator': 'is_empty',
                             'value': None}
                            ]
                  },
        'Rule2': {'condition': 'AND',
                  'rules': [{'condition': 'OR',
                             'rules': [{'field': 'A',
                                        'operator': 'less_or_equal',
                                        'value': 60.0},
                                       {'field': 'B',
                                        'operator': 'not_begins_with',
                                        'value': 'foo'},
                                       {'field': 'C',
                                        'operator': 'is_not_null',
                                        'value': None}]},
                            {'field': 'D',
                             'operator': 'not_in', 'value': ['foo', 'bar']},
                            {'field': 'E',
                             'operator': 'not_equal_field',
                             'value': 'F'},
                            {'field': 'G',
                             'operator': 'is_not_empty',
                             'value': None}
                            ]
                  }

    }

In [61]:
r = Rules(rule_dicts=rule_dicts)
r.as_string(as_numpy=True)

{'Rule1': "((X['A'].to_numpy(na_value=np.nan)>=60.0)|(X['B'].str.startswith('foo', na=False))|(pd.isna(X['C'].to_numpy(na_value=np.nan))))&(X['D'].isin(['foo', 'bar']))&(X['E'].to_numpy(na_value=np.nan)==X['F'].to_numpy(na_value=np.nan))&(X['G'].fillna('')=='')",
 'Rule2': "((X['A'].to_numpy(na_value=np.nan)<=60.0)|(~X['B'].str.startswith('foo', na=False))|(~pd.isna(X['C'].to_numpy(na_value=np.nan))))&(~X['D'].isin(['foo', 'bar']))&(X['E'].to_numpy(na_value=np.nan)!=X['F'].to_numpy(na_value=np.nan))&(X['G'].fillna('')!='')"}

In [58]:
for rule_name, rule in r.rule_strings.items():
    print(eval(rule).sum())

11
0


In [65]:
rule_lambdas = r.as_lambda(as_numpy=True, with_kwargs=True)

In [69]:
rule_strings = {}

In [71]:
for rule_name, rule_lambda in rule_lambdas.items():
    rule_strings[rule_name] = rule_lambda(**r.lambda_kwargs[rule_name])

In [72]:
rule_strings

{'Rule1': "((X['A'].to_numpy(na_value=np.nan)>=60.0)|(X['B'].str.startswith('foo', na=False))|(pd.isna(X['C'].to_numpy(na_value=np.nan))))&(X['D'].isin(['foo', 'bar']))&(X['E'].to_numpy(na_value=np.nan)==X['F'].to_numpy(na_value=np.nan))&(X['G'].fillna('')=='')",
 'Rule2': "((X['A'].to_numpy(na_value=np.nan)<=60.0)|(~X['B'].str.startswith('foo', na=False))|(~pd.isna(X['C'].to_numpy(na_value=np.nan))))&(~X['D'].isin(['foo', 'bar']))&(X['E'].to_numpy(na_value=np.nan)!=X['F'].to_numpy(na_value=np.nan))&(X['G'].fillna('')!='')"}

In [68]:
r.lambda_kwargs

{'Rule1': {'A': 60.0, 'B': "'foo'", 'D': ['foo', 'bar'], 'E': "'F'"},
 'Rule2': {'A': 60.0, 'B': "'foo'", 'D': ['foo', 'bar'], 'E': "'F'"}}

In [74]:
from rule.convert_rule_dict_to_string import ConvertRuleDictToString

In [91]:
rule_dict = rule_dicts['Rule1']
rules_list = rule_dict['rules']
r = ConvertRuleDictToString(rule_dict=rule_dict)
r._convert_rule_dict_conditions(rules_list=rules_list, as_numpy=False, as_lambda=True, with_kwargs=False)

["((X['A']>={})|(X['B'].str.startswith({}, na=False))|(X['C'].isna()))",
 "(X['D'].isin({}))",
 "(X['E']==X[{}])",
 "(X['G'].fillna('')=='')"]

In [88]:
r.lambda_kwargs

{'A': 60.0, 'B': "'foo'", 'D': ['foo', 'bar'], 'E': "'F'"}

In [81]:
from itertools import product

In [84]:
list(product([False, True], ['a', 'b']))

[(False, 'a'), (False, 'b'), (True, 'a'), (True, 'b')]

# ConvertRuleStringToDict

In [13]:
rule_strings = {
    'Rule1_pd': "((X['A']>=60.0)|(X['B'].str.startswith('foo', na=False))|(X['C'].isna()))&(X['D'].isin(['foo', 'bar']))&(X['E']==X['F'])&(X['G'].fillna('')=='')",
    'Rule1_np': "((X['A'].to_numpy(na_value=np.nan)>=60.0)|(X['B'].str.startswith('foo', na=False))|(pd.isna(X['C'].to_numpy(na_value=np.nan))))&(X['D'].isin(['foo', 'bar']))&(X['E'].to_numpy(na_value=np.nan)==X['F'].to_numpy(na_value=np.nan))&(X['G'].fillna('')=='')",
    'Rule2': "(X['A'].str.startswith(')('))"
}

In [10]:
from rule.convert_rule_string_to_dict import ConvertRuleStringToDict

In [11]:
r = ConvertRuleStringToDict(rule_strings['Rule2_pd'])

In [12]:
r.convert()

{'condition': 'AND',
 'rules': [{'field': 'A', 'operator': 'begins_with', 'value': ')('}]}

In [21]:
r._find_top_level_parentheses_idx(rule_string=rule_strings['Rule1_pd'])

{1: 72, 75: 102, 105: 119, 122: 143}

In [22]:
expected_results = {
        'Rule1_pd': {1: 72, 75: 102, 105: 119, 122: 143},
        'Rule1_np': {1: 126, 129: 156, 159: 225, 228: 249},
        'Rule2': {1: 28},
    }

In [25]:
r._return_conditions_string_list(
            parentheses_pair_idxs=expected_results['Rule2'],
            rule_string=rule_strings['Rule2'])

["X['A'].str.startswith(')(')"]

In [27]:
r._find_connecting_conditions(parentheses_pair_idxs=expected_results['Rule1_pd'],
                            rule_string=rule_strings['Rule1_pd'])

['&', '&', '&']

In [28]:
expected_results = {
        'Rule1_pd': ["(X['A']>=60.0)|(X['B'].str.startswith('foo', na=False))|(X['C'].isna())",
                     "X['D'].isin(['foo', 'bar'])",
                     "X['E']==X['F']",
                     "X['G'].fillna('')==''"],
        'Rule1_np': ["(X['A'].to_numpy(na_value=np.nan)>=60.0)|(X['B'].str.startswith('foo', na=False))|(pd.isna(X['C'].to_numpy(na_value=np.nan)))",
                     "X['D'].isin(['foo', 'bar'])",
                     "X['E'].to_numpy(na_value=np.nan)==X['F'].to_numpy(na_value=np.nan)",
                     "X['G'].fillna('')==''"],
        'Rule2': ["X['A'].str.startswith(')(')"]
    }

In [31]:
parent_dict = {
            'condition': None,
            'rules': []
        }

In [32]:
r._convert_rule_string_conditions(expected_results['Rule1_pd'], parent_dict=parent_dict)

{'condition': None,
 'rules': [{'condition': 'OR',
   'rules': [{'field': 'A', 'operator': 'greater_or_equal', 'value': 60.0},
    {'field': 'B', 'operator': 'begins_with', 'value': 'foo'},
    {'field': 'C', 'operator': 'is_null', 'value': None}]},
  {'field': 'D', 'operator': 'in', 'value': ['foo', 'bar']},
  {'field': 'E', 'operator': 'equal_field', 'value': 'F'},
  {'field': 'G', 'operator': 'is_empty', 'value': None}]}

In [33]:
{'condition': 'AND',
                  'rules': [{'condition': 'OR',
                             'rules': [{'field': 'A',
                                        'operator': 'greater_or_equal',
                                        'value': 60.0},
                                       {'field': 'B',
                                        'operator': 'begins_with',
                                        'value': 'foo'},
                                       {'field': 'C',
                                        'operator': 'is_null',
                                        'value': None}]},
                            {'field': 'D',
                             'operator': 'in', 'value': ['foo', 'bar']},
                            {'field': 'E',
                             'operator': 'equal_field',
                             'value': 'F'},
                            {'field': 'G',
                             'operator': 'is_empty',
                             'value': None}
                            ]
                  }

{'condition': 'AND',
 'rules': [{'condition': 'OR',
   'rules': [{'field': 'A', 'operator': 'greater_or_equal', 'value': 60.0},
    {'field': 'B', 'operator': 'begins_with', 'value': 'foo'},
    {'field': 'C', 'operator': 'is_null', 'value': None}]},
  {'field': 'D', 'operator': 'in', 'value': ['foo', 'bar']},
  {'field': 'E', 'operator': 'equal_field', 'value': 'F'},
  {'field': 'G', 'operator': 'is_empty', 'value': None}]}