In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from rule_generation.rule_generator_opt import RuleGeneratorOpt
from rule_generation.rule_generator_dt import RuleGeneratorDT
from sklearn.metrics import precision_score

import pandas as pd
import numpy as np
import json

In [3]:
X = pd.DataFrame(
    {
        'A': np.array([-1, 1, np.nan, 2] * 250),
        'country': ['GB', 'US', 'FR', np.nan] * 250
    }
)
y = X['A'].apply(lambda x: 1 if x < 0 or np.isnan(x) else 0)

In [4]:
X_orig = X.copy()

In [5]:
X['A'] = X['A'].fillna(0)

In [6]:
X['country'] = X['country'].fillna('missing')

In [7]:
X.head()

Unnamed: 0,A,country
0,-1.0,GB
1,1.0,US
2,0.0,FR
3,2.0,missing
4,-1.0,GB


In [8]:
X = pd.get_dummies(X)

In [9]:
imputed_values = {
    'A': 0,
    'country': 'missing'
}

# Numeric conditions

In [167]:
rule_strings = {
    'nan_low': "X['A']>= 0",
    'nan_middle': "X['A'] >= -1",
    'nan_high': "X['A'] <= 0",
    'no_nan': "X['A'] >= 2"
}

rule_comps = {
    'nan_low': ('A', '>=', 0),
    'nan_middle': ('A', '>=', -1),
    'nan_high': ('A', '<=', 0),
    'no_nan': ('A', '>=', 2)
}

- Imputed value is the minimum value in the range
- Imputed value is the maximum value in the range
- Imputed value is somewhere in the middle

In [168]:
def add_null_condition(feature, operator, value, null_value):
    X_rule_str = f"X['{feature}']{operator}{value}"
    X_rule_null_value_str = f"(X['{feature}']{operator}{null_value}"
    is_null_in_rule_str = f'{null_value}{operator}{value}'
    X_is_null_str = f"(X['{feature}']=={null_value})"
    if eval(is_null_in_rule_str):
        if all(eval(X_rule_str) == eval(X_is_null_str)):
            clean_condition = f"(X['{feature}'].isna())"
        elif null_value == value and operator == '>=':
            next_lowest_value = eval(f"X[{X_rule_str}]['{feature}'].drop_duplicates().nsmallest(2).iloc[-1]")
            clean_condition = f"(X['{feature}']>={next_lowest_value})|(X['{feature}'].isna())"
        elif null_value == value and operator == '<=':
            next_highest_value = eval(f"X[{X_rule_str}]['{feature}'].drop_duplicates().nlargest(2).iloc[-1]")
            clean_condition = f"(X['{feature}']<={next_highest_value})|(X['{feature}'].isna())"
        else:
            clean_condition = f"(X['{feature}']{operator}{value})|(X['{feature}'].isna())"
    else:
        clean_condition = f"(X['{feature}']{operator}{value})"
    return clean_condition

In [169]:
# add_null_condition('A', '<=', 2, 0)

In [170]:
cleaned_rules = {}
for rule_name, args in rule_comps.items():
    null_value = imputed_values[args[0]]
    cleaned_rules[rule_name] = add_null_condition(*args, null_value)

In [171]:
cleaned_rules

{'nan_low': "(X['A']>=1.0)|(X['A'].isna())",
 'nan_middle': "(X['A']>=-1)|(X['A'].isna())",
 'nan_high': "(X['A']<=-1.0)|(X['A'].isna())",
 'no_nan': "(X['A']>=2)"}

In [172]:
from rule_application.argo_rule_applier import ArgoRuleApplier

In [173]:
apply_cleaned = ArgoRuleApplier(rule_strings = cleaned_rules)

In [174]:
X_rules_cleaned = apply_cleaned.apply(pd.DataFrame(X['A'].apply(lambda x: np.nan if x == 0 else x)))

In [175]:
apply_orig = ArgoRuleApplier(rule_strings = rule_strings)

In [176]:
X_rules_orig = apply_orig.apply(X)

In [177]:
(X_rules_orig == X_rules_cleaned).mean()

nan_low       1.0
nan_middle    1.0
nan_high      1.0
no_nan        1.0
dtype: float64

# Categorical conditions

In [146]:
ohe_cols_cats = {
    'country_GB': 'GB',
    'country_US': 'US',
    'country_FR': 'FR',
    'country_missing': 'missing',
}
null_ohe_cols = ['country_missing']

In [221]:
rule_strings = {
    'is_GB': "(X['country_GB']==True)",
    'is_not_FR': "(X['country_FR']==False)",
    'is_missing': "(X['country_missing']==True)",
    'is_not_missing': "(X['country_missing']==False)",
}

rule_comps = {
    'is_GB': ('country_GB', '==', 'True'),
    'is_not_FR': ('country_FR', '==', 'False'),
    'is_missing': ('country_missing', '==', 'True'),
    'is_not_missing': ('country_missing', '==', 'False'),
}

In [190]:
def convert_ohe_condition(feature, operator, value, category, is_null_col):
    original_feature = feature.split(f"_{category}")[0]
    if is_null_col:
        if value == 'True':
            cleaned_condition = f"(X['{original_feature}'].isna())"
        elif value == 'False':
            cleaned_condition = f"(~X['{original_feature}'].isna())"
    else:
        if (operator == '==' and value == 'True') or (operator == '!=' and value == 'False'):
            cleaned_condition = f"(X['{original_feature}']=='{category}')"
        elif (operator == '==' and value == 'False') or (operator == '!=' and value == 'True'):
            cleaned_condition = f"(X['{original_feature}']!='{category}')"
    return cleaned_condition

In [194]:
cleaned_conditions = {}
for rule_name, args in rule_comps.items():
    feature = args[0]
    category = ohe_cols_cats[feature]
    original_feature = feature.split(f"_{category}")[0]
    if imputed_values[original_feature] == category:
        is_null_col = True
    else:
        is_null_col = False
    cleaned_conditions[rule_name] = convert_ohe_condition(*args, category, is_null_col)

In [195]:
cleaned_conditions

{'is_GB': "(X['country']=='GB')",
 'is_not_FR': "(X['country']!='FR')",
 'is_missing': "(X['country'].isna())",
 'is_not_missing': "(~X['country'].isna())"}

# Test class

In [152]:
imputed_values = {
    'A': 0,
    'country': 'missing'
}
ohe_categories = {
    'country_GB': 'GB',
    'country_US': 'US',
    'country_FR': 'FR',
    'country_missing': 'missing',
}


rule_strings = {
    'nan_low': "(X['A']>=0)",
    'nan_middle': "(X['A']>=-1)",
    'nan_high': "(X['A']<=0)",
    'no_nan': "(X['A']>=2)",
    'is_GB': "(X['country_GB']==True)",
    'is_not_FR': "(X['country_FR']==False)",
    'is_missing': "(X['country_missing']==True)",
    'is_not_missing': "(X['country_missing']==False)",
}

rule_comps = {
    'nan_low': ('A', '>=', 0),
    'nan_middle': ('A', '>=', -1),
    'nan_high': ('A', '<=', 0),
    'no_nan': ('A', '>=', 2),
    'is_GB': ('country_GB', '==', 'True'),
    'is_not_FR': ('country_FR', '==', 'False'),
    'is_missing': ('country_missing', '==', 'True'),
    'is_not_missing': ('country_missing', '==', 'False'),
}

In [157]:
from itertools import combinations

In [161]:
comb_rules = {}
for i, (r1, r2) in enumerate(list(combinations(list(rule_strings.values()), r=2))):
    comb_rules[f'combined_rule_{i}'] = f'{r1}&{r2}'

In [162]:
comb_rules

{'combined_rule_0': "(X['A']>=0)&(X['A']>=-1)",
 'combined_rule_1': "(X['A']>=0)&(X['A']<=0)",
 'combined_rule_2': "(X['A']>=0)&(X['A']>=2)",
 'combined_rule_3': "(X['A']>=0)&(X['country_GB']==True)",
 'combined_rule_4': "(X['A']>=0)&(X['country_FR']==False)",
 'combined_rule_5': "(X['A']>=0)&(X['country_missing']==True)",
 'combined_rule_6': "(X['A']>=0)&(X['country_missing']==False)",
 'combined_rule_7': "(X['A']>=-1)&(X['A']<=0)",
 'combined_rule_8': "(X['A']>=-1)&(X['A']>=2)",
 'combined_rule_9': "(X['A']>=-1)&(X['country_GB']==True)",
 'combined_rule_10': "(X['A']>=-1)&(X['country_FR']==False)",
 'combined_rule_11': "(X['A']>=-1)&(X['country_missing']==True)",
 'combined_rule_12': "(X['A']>=-1)&(X['country_missing']==False)",
 'combined_rule_13': "(X['A']<=0)&(X['A']>=2)",
 'combined_rule_14': "(X['A']<=0)&(X['country_GB']==True)",
 'combined_rule_15': "(X['A']<=0)&(X['country_FR']==False)",
 'combined_rule_16': "(X['A']<=0)&(X['country_missing']==True)",
 'combined_rule_17': "(X[

In [11]:
from rule_generation.condition_cleaner import ConditionCleaner
import argo_utils.argo_utils as argo_utils

In [12]:
# cc = ConditionCleaner(imputed_values=imputed_values, ohe_categories=ohe_categories)

In [21]:
cleaned_results = {}
ohe_cols = list(ohe_categories.keys())
for rule_name, rule_comp in rule_comps.items():
#     cleaned_results[rule_name] = cc.clean(*rule_comp, X=X)
    if rule_comp[0] in ohe_cols:
        cleaned_results[rule_name] = argo_utils.convert_ohe_rule(*rule_comp, ohe_categories=ohe_categories, imputed_values=imputed_values)
    else:
        cleaned_results[rule_name] = argo_utils.add_null_condition_to_imputed_numeric_rule(*rule_comp, imputed_values=imputed_values, X=X)

In [22]:
cleaned_results

{'nan_low': "(X['A']>=1.0)|(X['A'].isna())",
 'nan_middle': "(X['A']>=-1)|(X['A'].isna())",
 'nan_high': "(X['A']<=-1.0)|(X['A'].isna())",
 'no_nan': "(X['A']>=2)",
 'is_GB': "(X['country']=='GB')",
 'is_not_FR': "(X['country']!='FR')",
 'is_missing': "(X['country'].isna())",
 'is_not_missing': "(~X['country'].isna())"}

In [23]:
from rule_application.argo_rule_applier import ArgoRuleApplier

In [24]:
apply_cleaned = ArgoRuleApplier(rule_strings=cleaned_results)

In [25]:
X_rules_cleaned = apply_cleaned.apply(X=X_orig)

In [26]:
apply_orig = ArgoRuleApplier(rule_strings=rule_strings)

In [27]:
X_rules_orig = apply_orig.apply(X)

In [28]:
(X_rules_cleaned == X_rules_orig).mean()

nan_low           1.0
nan_middle        1.0
nan_high          1.0
no_nan            1.0
is_GB             1.0
is_not_FR         1.0
is_missing        1.0
is_not_missing    1.0
dtype: float64

In [31]:
argo_utils.convert_ohe_condition_to_general(feature='country_GB', operator='==', value='', ohe_categories={'country_GB': 'GB'}, imputed_values=None)

TypeError: 'NoneType' object is not subscriptable

# Try with rule gen

In [91]:
import random

In [92]:
X_orig = pd.DataFrame(
    {
        'A': np.array([-1, 1, np.nan, 2] * 250),
        'B': np.array([0, np.nan, 2, 4] * 250),
        'C': ['GB', 'US', 'FR', np.nan] * 250
    }
)
y = X_orig['B'].apply(lambda x: 1 if x >= 2  else 0)

In [93]:
X_processed = X_orig.fillna({'A': 0, 'B':-1, 'C': 'missing'})

In [94]:
X_processed = pd.get_dummies(X_processed)

In [95]:
rgo_wo_imputed = RuleGeneratorOpt(opt_func=precision_score, n_total_conditions=4, num_rules_keep=50, remove_corr_rules=False)

In [96]:
rgo_wo_imputed.fit(X_processed, y)

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,RGO_Rule4,RGO_Rule10,RGO_Rule1,RGO_Rule30,RGO_Rule34,RGO_Rule7,RGO_Rule9,RGO_Rule0,RGO_Rule2,RGO_Rule3
0,0,0,0,0,0,0,1,0,1,1
1,0,0,0,0,0,1,0,1,1,1
2,1,0,1,1,1,1,1,1,1,1
3,0,1,1,1,1,1,1,1,1,1
4,0,0,0,0,0,0,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...
995,0,1,1,1,1,1,1,1,1,1
996,0,0,0,0,0,0,1,0,1,1
997,0,0,0,0,0,1,0,1,1,1
998,1,0,1,1,1,1,1,1,1,1


In [97]:
rgo_wo_imputed.rule_descriptions

Unnamed: 0_level_0,Logic,Precision,Recall,nConditions,PercDataFlagged,OptMetric
Rule,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RGO_Rule4,(X['C_FR']==True),1.0,0.5,1,0.25,1.0
RGO_Rule10,(X['C_missing']==True),1.0,0.5,1,0.25,1.0
RGO_Rule1,(X['B']>=2),1.0,1.0,1,0.5,1.0
RGO_Rule30,(X['C_GB']==False)&(X['C_US']==False),1.0,1.0,2,0.5,1.0
RGO_Rule34,(X['A']>=0)&(X['C_US']==False),1.0,1.0,2,0.5,1.0
RGO_Rule7,(X['C_GB']==False),0.666667,1.0,1,0.75,0.666667
RGO_Rule9,(X['C_US']==False),0.666667,1.0,1,0.75,0.666667
RGO_Rule0,(X['A']>=0),0.666667,1.0,1,0.75,0.666667
RGO_Rule2,(X['A']<=2),0.5,1.0,1,1.0,0.5
RGO_Rule3,(X['B']<=4),0.5,1.0,1,1.0,0.5


In [100]:
imputed_values={'A': 0, 'B':-1, 'C': 'missing'}
ohe_categories={'C_US': 'US', 'C_GB': 'GB', 'C_FR': 'FR', 'C_missing': 'missing'}
rule_strings = rgo_wo_imputed.rules.rule_strings

In [116]:
from rules.convert_processed_conditions_to_general import ConvertProcessedConditionsToGeneral

In [134]:
c = ConvertProcessedConditionsToGeneral(imputed_values=imputed_values, ohe_categories=ohe_categories)

In [135]:
# c._recurse_create_condition_replacement_dict(rule_string="(X['C_GB']==False)&(X['C_US']==False)", X=X_processed)

In [136]:
# c.condition_replacement_dict

In [137]:
c.convert(rule_strings=rule_strings, X=X_processed)

{'RGO_Rule4': "(X['C']=='FR')",
 'RGO_Rule10': "(X['C'].isna())",
 'RGO_Rule1': "(X['B']>=2)",
 'RGO_Rule30': "(X['C']!='GB')&(X['C']!='US')",
 'RGO_Rule34': "((X['A']>=0)|(X['A'].isna()))&(X['C']!='US')",
 'RGO_Rule7': "(X['C']!='GB')",
 'RGO_Rule9': "(X['C']!='US')",
 'RGO_Rule0': "((X['A']>=0)|(X['A'].isna()))",
 'RGO_Rule2': "((X['A']<=2)|(X['A'].isna()))",
 'RGO_Rule3': "((X['B']<=4)|(X['B'].isna()))"}

## Compare application of rules 

In [139]:
apply_processed = ArgoRuleApplier(rule_strings = rgo_wo_imputed.rules.rule_strings)

In [141]:
X_rules_processed = apply_processed.apply(X_processed, y)

In [144]:
apply_general = ArgoRuleApplier(rule_strings = c.rules.rule_strings)

In [145]:
X_rules_general = apply_general.apply(X_orig, y)

In [147]:
(X_rules_processed == X_rules_general).mean()

RGO_Rule4     1.0
RGO_Rule10    1.0
RGO_Rule1     1.0
RGO_Rule30    1.0
RGO_Rule34    1.0
RGO_Rule7     1.0
RGO_Rule9     1.0
RGO_Rule0     1.0
RGO_Rule2     1.0
RGO_Rule3     1.0
dtype: float64

# Unit testing

In [186]:
X_orig = pd.DataFrame(
    {
        'A': np.array([-1, 1, np.nan, 2] * 250),
        'country': ['GB', 'US', 'FR', np.nan] * 250
    }
)
imputed_values = {
    'A': 0,
    'country': 'missing'
}
ohe_categories = {
    'country_GB': 'GB',
    'country_US': 'US',
    'country_FR': 'FR',
    'country_missing': 'missing',
}
X_processed = X_orig.fillna(imputed_values)
X_processed = pd.get_dummies(X_processed)
rule_strings = {
    'nan_low': "(X['A']>=0)",
    'nan_middle': "(X['A']>=-1)",
    'nan_high': "(X['A']<=0)",
    'no_nan': "(X['A']>=2)",
    'is_GB': "(X['country_GB']==True)",
    'is_not_FR': "(X['country_FR']==False)",
    'is_missing': "(X['country_missing']==True)",
    'is_not_missing': "(X['country_missing']==False)",
    'combined_rule_0': "(X['A']>=0)&(X['A']>=-1)",
    'combined_rule_1': "(X['A']>=0)&(X['A']<=0)",
    'combined_rule_2': "(X['A']>=0)&(X['A']>=2)",
    'combined_rule_3': "(X['A']>=0)&(X['country_GB']==True)",
    'combined_rule_4': "(X['A']>=0)&(X['country_FR']==False)",
    'combined_rule_5': "(X['A']>=0)&(X['country_missing']==True)",
    'combined_rule_6': "(X['A']>=0)&(X['country_missing']==False)",
    'combined_rule_7': "(X['A']>=-1)&(X['A']<=0)",
    'combined_rule_8': "(X['A']>=-1)&(X['A']>=2)",
    'combined_rule_9': "(X['A']>=-1)&(X['country_GB']==True)",
    'combined_rule_10': "(X['A']>=-1)&(X['country_FR']==False)",
    'combined_rule_11': "(X['A']>=-1)&(X['country_missing']==True)",
    'combined_rule_12': "(X['A']>=-1)&(X['country_missing']==False)",
    'combined_rule_13': "(X['A']<=0)&(X['A']>=2)",
    'combined_rule_14': "(X['A']<=0)&(X['country_GB']==True)",
    'combined_rule_15': "(X['A']<=0)&(X['country_FR']==False)",
    'combined_rule_16': "(X['A']<=0)&(X['country_missing']==True)",
    'combined_rule_17': "(X['A']<=0)&(X['country_missing']==False)",
    'combined_rule_18': "(X['A']>=2)&(X['country_GB']==True)",
    'combined_rule_19': "(X['A']>=2)&(X['country_FR']==False)",
    'combined_rule_20': "(X['A']>=2)&(X['country_missing']==True)",
    'combined_rule_21': "(X['A']>=2)&(X['country_missing']==False)",
    'combined_rule_22': "(X['country_GB']==True)&(X['country_FR']==False)",
    'combined_rule_23': "(X['country_GB']==True)&(X['country_missing']==True)",
    'combined_rule_24': "(X['country_GB']==True)&(X['country_missing']==False)",
    'combined_rule_25': "(X['country_FR']==False)&(X['country_missing']==True)",
    'combined_rule_26': "(X['country_FR']==False)&(X['country_missing']==False)",
    'combined_rule_27': "(X['country_missing']==True)&(X['country_missing']==False)"
}
c = ConvertProcessedConditionsToGeneral(imputed_values, ohe_categories)

In [172]:
c.convert(rule_strings, X_processed)

{'nan_low': "((X['A']>=0)|(X['A'].isna()))",
 'nan_middle': "((X['A']>=-1)|(X['A'].isna()))",
 'nan_high': "((X['A']<=0)|(X['A'].isna()))",
 'no_nan': "(X['A']>=2)",
 'is_GB': "(X['country']=='GB')",
 'is_not_FR': "(X['country']!='FR')",
 'is_missing': "(X['country'].isna())",
 'is_not_missing': "(~X['country'].isna())",
 'combined_rule_0': "((X['A']>=0)|(X['A'].isna()))&((X['A']>=-1)|(X['A'].isna()))",
 'combined_rule_1': "((X['A']>=0)|(X['A'].isna()))&((X['A']<=0)|(X['A'].isna()))",
 'combined_rule_2': "((X['A']>=0)|(X['A'].isna()))&(X['A']>=2)",
 'combined_rule_3': "((X['A']>=0)|(X['A'].isna()))&(X['country']=='GB')",
 'combined_rule_4': "((X['A']>=0)|(X['A'].isna()))&(X['country']!='FR')",
 'combined_rule_5': "((X['A']>=0)|(X['A'].isna()))&(X['country'].isna())",
 'combined_rule_6': "((X['A']>=0)|(X['A'].isna()))&(~X['country'].isna())",
 'combined_rule_7': "((X['A']>=-1)|(X['A'].isna()))&((X['A']<=0)|(X['A'].isna()))",
 'combined_rule_8': "((X['A']>=-1)|(X['A'].isna()))&(X['A']>=2

In [182]:
expected_general_rule_strings = {'nan_low': "((X['A']>=0)|(X['A'].isna()))", 'nan_middle': "((X['A']>=-1)|(X['A'].isna()))", 'nan_high': "((X['A']<=0)|(X['A'].isna()))", 'no_nan': "(X['A']>=2)", 'is_GB': "(X['country']=='GB')", 'is_not_FR': "(X['country']!='FR')", 'is_missing': "(X['country'].isna())", 'is_not_missing': "(~X['country'].isna())", 'combined_rule_0': "((X['A']>=0)|(X['A'].isna()))&((X['A']>=-1)|(X['A'].isna()))", 'combined_rule_1': "((X['A']>=0)|(X['A'].isna()))&((X['A']<=0)|(X['A'].isna()))", 'combined_rule_2': "((X['A']>=0)|(X['A'].isna()))&(X['A']>=2)", 'combined_rule_3': "((X['A']>=0)|(X['A'].isna()))&(X['country']=='GB')", 'combined_rule_4': "((X['A']>=0)|(X['A'].isna()))&(X['country']!='FR')", 'combined_rule_5': "((X['A']>=0)|(X['A'].isna()))&(X['country'].isna())", 'combined_rule_6': "((X['A']>=0)|(X['A'].isna()))&(~X['country'].isna())", 'combined_rule_7': "((X['A']>=-1)|(X['A'].isna()))&((X['A']<=0)|(X['A'].isna()))", 'combined_rule_8': "((X['A']>=-1)|(X['A'].isna()))&(X['A']>=2)", 'combined_rule_9': "((X['A']>=-1)|(X['A'].isna()))&(X['country']=='GB')", 'combined_rule_10': "((X['A']>=-1)|(X['A'].isna()))&(X['country']!='FR')", 'combined_rule_11': "((X['A']>=-1)|(X['A'].isna()))&(X['country'].isna())", 'combined_rule_12': "((X['A']>=-1)|(X['A'].isna()))&(~X['country'].isna())", 'combined_rule_13': "((X['A']<=0)|(X['A'].isna()))&(X['A']>=2)", 'combined_rule_14': "((X['A']<=0)|(X['A'].isna()))&(X['country']=='GB')", 'combined_rule_15': "((X['A']<=0)|(X['A'].isna()))&(X['country']!='FR')", 'combined_rule_16': "((X['A']<=0)|(X['A'].isna()))&(X['country'].isna())", 'combined_rule_17': "((X['A']<=0)|(X['A'].isna()))&(~X['country'].isna())", 'combined_rule_18': "(X['A']>=2)&(X['country']=='GB')", 'combined_rule_19': "(X['A']>=2)&(X['country']!='FR')", 'combined_rule_20': "(X['A']>=2)&(X['country'].isna())", 'combined_rule_21': "(X['A']>=2)&(~X['country'].isna())", 'combined_rule_22': "(X['country']=='GB')&(X['country']!='FR')", 'combined_rule_23': "(X['country']=='GB')&(X['country'].isna())", 'combined_rule_24': "(X['country']=='GB')&(~X['country'].isna())", 'combined_rule_25': "(X['country']!='FR')&(X['country'].isna())", 'combined_rule_26': "(X['country']!='FR')&(~X['country'].isna())", 'combined_rule_27': "(X['country'].isna())&(~X['country'].isna())"}

In [183]:
expected_general_rule_strings == c.rules.rule_strings

True

In [189]:
c.condition_replacement_dict = {}
c._recurse_create_condition_replacement_dict(rule_string=rule_strings['combined_rule_5'], X=X_processed)

In [190]:
c.condition_replacement_dict

{"(X['A']>=0)": "((X['A']>=0)|(X['A'].isna()))",
 "(X['country_missing']==True)": "(X['country'].isna())"}

In [173]:
from rule_application.argo_rule_applier import ArgoRuleApplier

In [175]:
apply_processed = ArgoRuleApplier(rule_strings=rule_strings)
X_rules_processed = apply_processed.apply(X_processed)

In [177]:
apply_general = ArgoRuleApplier(rule_strings=c.rules.rule_strings)
X_rules_general = apply_general.apply(X_orig)

In [180]:
(X_rules_processed == X_rules_general).mean().mean()

1.0

In [191]:
test = {'A':1, 'B':2}

In [193]:
test['A', 'B']

KeyError: ('A', 'B')