diff --git a/skrules/__init__.py b/skrules/__init__.py index eeca603..7789da2 100644 --- a/skrules/__init__.py +++ b/skrules/__init__.py @@ -1,4 +1,4 @@ from .skope_rules import SkopeRules -from .rule import Rule, replace_feature_name +from .rule import Rule __all__ = ['SkopeRules', 'Rule'] diff --git a/skrules/rule.py b/skrules/rule.py index 7c11979..8e4413a 100644 --- a/skrules/rule.py +++ b/skrules/rule.py @@ -1,13 +1,3 @@ -import re - -def replace_feature_name(rule, replace_dict): - def replace(match): - return replace_dict[match.group(0)] - - rule = re.sub('|'.join(r'\b%s\b' % re.escape(s) for s in replace_dict), - replace, rule) - return rule - class Rule: """ An object modelizing a logical rule and add factorization methods. It is used to simplify rules and deduplicate them. @@ -66,4 +56,3 @@ def __repr__(self): [feature, symbol, str(self.agg_dict[(feature, symbol)])]) for feature, symbol in sorted(self.agg_dict.keys()) ]) - diff --git a/skrules/skope_rules.py b/skrules/skope_rules.py index aa11045..11e8e0b 100644 --- a/skrules/skope_rules.py +++ b/skrules/skope_rules.py @@ -12,10 +12,10 @@ from sklearn.externals import six from sklearn.tree import _tree -from .rule import Rule, replace_feature_name +from .rule import Rule INTEGER_TYPES = (numbers.Integral, np.integer) -BASE_FEATURE_NAME = "__C__" + class SkopeRules(BaseEstimator): """ An easy-interpretable classifier optimizing simple logical rules. @@ -249,17 +249,11 @@ def fit(self, X, y, sample_weight=None): self.estimators_samples_ = [] self.estimators_features_ = [] - # default columns names : - feature_names_ = [BASE_FEATURE_NAME + x for x in - np.arange(X.shape[1]).astype(str)] - if self.feature_names is not None: - self.feature_dict_ = {BASE_FEATURE_NAME + str(i): feat - for i, feat in enumerate(self.feature_names)} - else: - self.feature_dict_ = {BASE_FEATURE_NAME + str(i): feat - for i, feat in enumerate(feature_names_)} + # default columns names of the form ['c0', 'c1', ...]: + feature_names_ = (self.feature_names if self.feature_names is not None + else ['c' + x for x in + np.arange(X.shape[1]).astype(str)]) self.feature_names_ = feature_names_ - clfs = [] regs = [] @@ -362,10 +356,6 @@ def fit(self, X, y, sample_weight=None): for rule in [Rule(r, args=args) for r, args in rules_]] - - - - # keep only rules verifying precision_min and recall_min: for rule, score in rules_: if score[0] >= self.precision_min and score[1] >= self.recall_min: @@ -387,14 +377,7 @@ def fit(self, X, y, sample_weight=None): # Deduplicate the rule using semantic tree if self.max_depth_duplication is not None: self.rules_ = self.deduplicate(self.rules_) - self.rules_ = sorted(self.rules_, key=lambda x: - self.f1_score(x)) - self.rules_without_feature_names_ = self.rules_ - - # Replace generic feature names by real feature names - self.rules_ = [(replace_feature_name(rule, self.feature_dict_), perf) - for rule, perf in self.rules_] - return self def predict(self, X): @@ -449,7 +432,7 @@ def decision_function(self, X): % (X.shape[1], self.n_features_)) df = pandas.DataFrame(X, columns=self.feature_names_) - selected_rules = self.rules_without_feature_names_ + selected_rules = self.rules_ scores = np.zeros(X.shape[0]) for (r, w) in selected_rules: diff --git a/skrules/tests/test_rule.py b/skrules/tests/test_rule.py index ab1048a..9b8e28a 100644 --- a/skrules/tests/test_rule.py +++ b/skrules/tests/test_rule.py @@ -1,6 +1,6 @@ from sklearn.utils.testing import assert_equal, assert_not_equal -from skrules import Rule, replace_feature_name +from skrules import Rule def test_rule(): @@ -53,13 +53,3 @@ def test_equals_rule(): rule3 = "a < 3.0 and a == a" assert_equal(rule3, str(Rule(rule3))) - - -def test_replace_feature_name(): - rule = "__C__0 <= 3 and __C__1 > 4" - real_rule = "$b <= 3 and c(4) > 4" - replace_dict = { - "__C__0": "$b", - "__C__1": "c(4)" - } - assert_equal(replace_feature_name(rule, replace_dict=replace_dict), real_rule)