# Generating rules with RuleKit

Currently decision-rules does not generate rules. You can, however, generate rules using RuleKit and convert the model into decision-rules rule set.

We will start by importing the RuleKit package.

In [1]:
import pandas as pd
from rulekit import RuleKit
from rulekit.classification import RuleClassifier
from rulekit.params import Measures

RuleKit.init()

We will use the following zoo dataset:

In [2]:
df = pd.read_csv("resources/zoo.csv")
display(df)

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,class
0,True,False,False,True,False,False,True,True,True,True,False,False,4.0,False,False,True,mammal
1,True,False,False,True,False,False,False,True,True,True,False,False,4.0,True,False,True,mammal
2,False,False,True,False,False,True,True,True,True,False,False,True,0.0,True,False,False,fish
3,True,False,False,True,False,False,True,True,True,True,False,False,4.0,False,False,True,mammal
4,True,False,False,True,False,False,True,True,True,True,False,False,4.0,True,False,True,mammal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,True,False,False,True,False,False,False,True,True,True,False,False,2.0,True,False,True,mammal
97,True,False,True,False,True,False,False,False,False,True,True,False,6.0,False,False,False,insect
98,True,False,False,True,False,False,True,True,True,True,False,False,4.0,True,False,True,mammal
99,False,False,True,False,False,False,False,False,False,True,False,False,0.0,False,False,False,invertebrate


The `class` column will be our target `y` and the other columns will be predictors `X`.

In [3]:
X = df.drop("class", axis=1)
y = df["class"]

Now we will generate the rules using RuleKit. We just need to create a `RuleClassifier` object and train it using the `fit` function.

In [4]:
rulekit_model = RuleClassifier()
rulekit_model.fit(X, y)

<rulekit.classification.RuleClassifier at 0x7f5dcda44490>

Let's see the generated rules.

In [5]:

for rule in rulekit_model.model.rules:
    print(rule)

IF aquatic = <0.50, inf) AND legs = <3, inf) AND toothed = <0.50, inf) AND hair = (-inf, 0.50) THEN class = {amphibian}
IF feathers = <0.50, inf) THEN class = {bird}
IF fins = <0.50, inf) AND eggs = <0.50, inf) THEN class = {fish}
IF legs = <5.50, inf) AND aquatic = (-inf, 0.50) AND eggs = <0.50, inf) THEN class = {insect}
IF backbone = (-inf, 0.50) AND airborne = (-inf, 0.50) THEN class = {invertebrate}
IF milk = <0.50, inf) THEN class = {mammal}
IF toothed = <0.50, inf) AND fins = (-inf, 0.50) AND legs = (-inf, 2) THEN class = {reptile}
IF hair = (-inf, 0.50) AND toothed = <0.50, inf) AND aquatic = (-inf, 0.50) THEN class = {reptile}
IF hair = (-inf, 0.50) AND feathers = (-inf, 0.50) AND aquatic = (-inf, 0.50) AND backbone = <0.50, inf) THEN class = {reptile}


The `RuleKitRuleSetFactory` from `ruleset_factories` converts the RuleKit model to decision-rules rule set.

In [6]:
from ruleset_factories._factories.classification import RuleKitRuleSetFactory
factory = RuleKitRuleSetFactory()
decision_rules_ruleset = factory.make(rulekit_model, X, y)

Let's check if the rules in `decision_rules_ruleset` are the same as in `rulekit_model`.

In [7]:
for rule in decision_rules_ruleset.rules:
    print(rule)

IF aquatic >= 0.50 AND legs >= 3.00 AND toothed >= 0.50 AND hair < 0.50 THEN class = amphibian (p=4, n=0, P=4, N=97)
IF feathers >= 0.50 THEN class = bird (p=20, n=0, P=20, N=81)
IF fins >= 0.50 AND eggs >= 0.50 THEN class = fish (p=13, n=0, P=13, N=88)
IF legs >= 5.50 AND aquatic < 0.50 AND eggs >= 0.50 THEN class = insect (p=8, n=0, P=8, N=93)
IF backbone < 0.50 AND airborne < 0.50 THEN class = invertebrate (p=10, n=2, P=10, N=91)
IF milk >= 0.50 THEN class = mammal (p=41, n=0, P=41, N=60)
IF toothed >= 0.50 AND fins < 0.50 AND legs < 2.00 THEN class = reptile (p=3, n=0, P=5, N=96)
IF hair < 0.50 AND toothed >= 0.50 AND aquatic < 0.50 THEN class = reptile (p=3, n=0, P=5, N=96)
IF hair < 0.50 AND feathers < 0.50 AND aquatic < 0.50 AND backbone >= 0.50 THEN class = reptile (p=4, n=0, P=5, N=96)


We can now predict values using `decision_rules_ruleset` and calculate various statistics describing the rules.

In [8]:
y_pred = decision_rules_ruleset.predict(X)
display(y_pred)

array(['mammal', 'mammal', 'fish', 'mammal', 'mammal', 'mammal', 'mammal',
       'fish', 'fish', 'mammal', 'mammal', 'bird', 'fish', 'invertebrate',
       'invertebrate', 'invertebrate', 'bird', 'mammal', 'fish', 'mammal',
       'bird', 'bird', 'mammal', 'bird', 'insect', 'amphibian',
       'amphibian', 'mammal', 'mammal', 'mammal', 'insect', 'mammal',
       'mammal', 'bird', 'fish', 'mammal', 'mammal', 'bird', 'fish',
       'insect', 'insect', 'bird', 'insect', 'bird', 'mammal', 'mammal',
       'invertebrate', 'mammal', 'mammal', 'mammal', 'mammal', 'insect',
       'amphibian', 'invertebrate', 'mammal', 'mammal', 'bird', 'bird',
       'bird', 'bird', 'fish', 'fish', 'reptile', 'mammal', 'mammal',
       'mammal', 'mammal', 'mammal', 'mammal', 'mammal', 'mammal', 'bird',
       'invertebrate', 'fish', 'mammal', 'mammal', 'reptile',
       'invertebrate', 'bird', 'bird', 'reptile', 'invertebrate', 'fish',
       'bird', 'mammal', 'invertebrate', 'fish', 'bird', 'insect',
      

The `calculate_for_classification` function computes the usual classification metrics, such as accuracy or F1.

In [9]:
from decision_rules.classification.prediction_indicators import calculate_for_classification
metrics = calculate_for_classification(y, y_pred)
display(metrics)

{'type_of_problem': 'classification',
 'general': {'Balanced_accuracy': 1.0,
  'Accuracy': 1.0,
  'Cohen_kappa': 1.0,
  'F1_micro': 1.0,
  'F1_macro': 1.0,
  'F1_weighted': 1.0,
  'G_mean_micro': 1.0,
  'G_mean_macro': 1.0,
  'G_mean_weighted': 1.0,
  'Recall_micro': 1.0,
  'Recall_macro': 1.0,
  'Recall_weighted': 1.0,
  'Specificity': 1.0,
  'Confusion_matrix': {'classes': ['amphibian',
    'bird',
    'fish',
    'insect',
    'invertebrate',
    'mammal',
    'reptile'],
   'amphibian': [4, 0, 0, 0, 0, 0, 0],
   'bird': [0, 20, 0, 0, 0, 0, 0],
   'fish': [0, 0, 13, 0, 0, 0, 0],
   'insect': [0, 0, 0, 8, 0, 0, 0],
   'invertebrate': [0, 0, 0, 0, 10, 0, 0],
   'mammal': [0, 0, 0, 0, 0, 41, 0],
   'reptile': [0, 0, 0, 0, 0, 0, 5]}},
 'for_classes': {'amphibian': {'TP': 4,
   'FP': 0,
   'TN': 97,
   'FN': 0,
   'Recall': 1.0,
   'Specificity': 1.0,
   'F1_score': 1.0,
   'G_mean': 1.0,
   'MCC': 1.0,
   'PPV': 1.0,
   'NPV': 1.0,
   'LR_plus': 0,
   'LR_minus': 0.0,
   'Odd_ratio': 0,

`calculate_ruleset_stats` shows some general information about the rule set.

In [10]:
general_stats = decision_rules_ruleset.calculate_ruleset_stats()
print(general_stats)

{'rules_count': 9, 'avg_conditions_count': 2.56, 'avg_precision': 0.98, 'avg_coverage': 0.89, 'total_conditions_count': 23}


You can compute metrics describing each rule using `calculate_rules_metrics`.

In [11]:
metrics = decision_rules_ruleset.calculate_rules_metrics(X, y)
for rule_id, metrics in metrics.items():
    print('Rule', rule_id)
    print(metrics)

Rule 3357850a-4701-41ee-92f5-17d524564033
{'p': 4, 'n': 0, 'P': 4, 'N': 97, 'p_unique': 4, 'n_unique': 4, 'support': 4, 'conditions_count': 4, 'precision': 1.0, 'coverage': 1.0, 'C2': 1.0, 'RSS': 1.0, 'correlation': 1.0, 'lift': 25.25, 'p_value': 2.4492245142881635e-07, 'TP': 4, 'FP': 0, 'TN': 97, 'FN': 0, 'sensitivity': 1.0, 'specificity': 1.0, 'negative_predictive_value': 1.0, 'odds_ratio': inf, 'relative_risk': inf, 'lr+': inf, 'lr-': 0.0}
Rule 0d28724c-56c4-47bb-847e-96fc60e191d9
{'p': 20, 'n': 0, 'P': 20, 'N': 81, 'p_unique': 20, 'n_unique': 20, 'support': 20, 'conditions_count': 1, 'precision': 1.0, 'coverage': 1.0, 'C2': 1.0, 'RSS': 1.0, 'correlation': 1.0, 'lift': 5.05, 'p_value': 1.4962781353003363e-21, 'TP': 20, 'FP': 0, 'TN': 81, 'FN': 0, 'sensitivity': 1.0, 'specificity': 1.0, 'negative_predictive_value': 1.0, 'odds_ratio': inf, 'relative_risk': inf, 'lr+': inf, 'lr-': 0.0}
Rule 9804e9ab-07d5-4954-8a32-22cb0033196a
{'p': 13, 'n': 0, 'P': 13, 'N': 88, 'p_unique': 13, 'n_uniq

The `calculate_condition_importances` finds the importance of each condition from the rule set. Similarily, `calculate_attribute_importances` calculates the importance of attributes.

In [12]:
from decision_rules.measures import c2
condition_importances = decision_rules_ruleset.calculate_condition_importances(X, y, measure=c2)
print('Condition importances:')
display(condition_importances)
attribute_importances = decision_rules_ruleset.calculate_attribute_importances(condition_importances)
print('Attribute importances:')
display(attribute_importances)

Condition importances:


{'amphibian': [{'condition': 'legs >= 3.00',
   'attributes': ['legs'],
   'importance': 0.21835455831817263},
  {'condition': 'toothed >= 0.50',
   'attributes': ['toothed'],
   'importance': 0.15137644827521457},
  {'condition': 'aquatic >= 0.50',
   'attributes': ['aquatic'],
   'importance': 0.0706758304696449},
  {'condition': 'hair < 0.50',
   'attributes': ['hair'],
   'importance': 0.05970494134376112}],
 'bird': [{'condition': 'feathers >= 0.50',
   'attributes': ['feathers'],
   'importance': 1.0}],
 'fish': [{'condition': 'fins >= 0.50',
   'attributes': ['fins'],
   'importance': 0.812392368349497},
  {'condition': 'eggs >= 0.50',
   'attributes': ['eggs'],
   'importance': 0.187607631650503}],
 'insect': [{'condition': 'legs >= 5.50',
   'attributes': ['legs'],
   'importance': 0.47480739916779957},
  {'condition': 'aquatic < 0.50',
   'attributes': ['aquatic'],
   'importance': 0.11461012106173396},
  {'condition': 'eggs >= 0.50',
   'attributes': ['eggs'],
   'importance

Attribute importances:


{'amphibian': {'legs': 0.21835455831817263,
  'toothed': 0.15137644827521457,
  'aquatic': 0.0706758304696449,
  'hair': 0.05970494134376112},
 'bird': {'feathers': 1.0},
 'fish': {'fins': 0.812392368349497, 'eggs': 0.187607631650503},
 'insect': {'legs': 0.47480739916779957,
  'aquatic': 0.11461012106173396,
  'eggs': 0.060634901349317274},
 'invertebrate': {'backbone': 0.6437411794554653,
  'airborne': 0.17127713556284985},
 'mammal': {'milk': 1.0},
 'reptile': {'hair': 0.49337088161384896,
  'aquatic': 0.4172718233231141,
  'toothed': 0.4041948772993855,
  'legs': 0.2763037988581467,
  'fins': 0.23977086814257867,
  'feathers': 0.18732960390946504,
  'backbone': 0.16063629518072292}}

We can serialize the decision-rules rule set to Python dict which can be later stored in JSON format. We do it using `JSONSerializer.serialize`.

In [13]:
from decision_rules.serialization.utils import JSONSerializer
from decision_rules.classification.ruleset import ClassificationRuleSet

In [14]:
ruleset_json = JSONSerializer.serialize(decision_rules_ruleset)
print(ruleset_json)

{'meta': {'attributes': ['hair', 'feathers', 'eggs', 'milk', 'airborne', 'aquatic', 'predator', 'toothed', 'backbone', 'breathes', 'venomous', 'fins', 'legs', 'tail', 'domestic', 'catsize'], 'decision_attribute': 'class', 'decision_attribute_distribution': {'amphibian': 4, 'bird': 20, 'fish': 13, 'insect': 8, 'invertebrate': 10, 'mammal': 41, 'reptile': 5}}, 'rules': [{'uuid': '3357850a-4701-41ee-92f5-17d524564033', 'string': 'IF aquatic >= 0.50 AND legs >= 3.00 AND toothed >= 0.50 AND hair < 0.50 THEN class = amphibian', 'premise': {'type': 'compound', 'operator': 'CONJUNCTION', 'subconditions': [{'type': 'elementary_numerical', 'attributes': [5], 'negated': False, 'left': 0.5, 'right': None, 'left_closed': True, 'right_closed': False}, {'type': 'elementary_numerical', 'attributes': [12], 'negated': False, 'left': 3.0, 'right': None, 'left_closed': True, 'right_closed': False}, {'type': 'elementary_numerical', 'attributes': [7], 'negated': False, 'left': 0.5, 'right': None, 'left_clos

In [15]:
import json 
with open('output/zoo.json', 'w') as f:
    json.dump(ruleset_json, f)

The serialized ruleset can be reloaded using `JSONSerializer.deserialize`.

In [16]:
with open('output/zoo.json') as f:
    deserialized_ruleset_json = json.load(f)
deserialized_ruleset = JSONSerializer.deserialize(deserialized_ruleset_json, target_class=ClassificationRuleSet)

Let's check if the rules are the same.

In [17]:
for rule in deserialized_ruleset.rules:
    print(rule)

IF aquatic >= 0.50 AND legs >= 3.00 AND toothed >= 0.50 AND hair < 0.50 THEN class = amphibian (p=4, n=0, P=4, N=97)
IF feathers >= 0.50 THEN class = bird (p=20, n=0, P=20, N=81)
IF fins >= 0.50 AND eggs >= 0.50 THEN class = fish (p=13, n=0, P=13, N=88)
IF legs >= 5.50 AND aquatic < 0.50 AND eggs >= 0.50 THEN class = insect (p=8, n=0, P=8, N=93)
IF backbone < 0.50 AND airborne < 0.50 THEN class = invertebrate (p=10, n=2, P=10, N=91)
IF milk >= 0.50 THEN class = mammal (p=41, n=0, P=41, N=60)
IF toothed >= 0.50 AND fins < 0.50 AND legs < 2.00 THEN class = reptile (p=3, n=0, P=5, N=96)
IF hair < 0.50 AND toothed >= 0.50 AND aquatic < 0.50 THEN class = reptile (p=3, n=0, P=5, N=96)
IF hair < 0.50 AND feathers < 0.50 AND aquatic < 0.50 AND backbone >= 0.50 THEN class = reptile (p=4, n=0, P=5, N=96)


Before using some of the functions of the deserialized ruleset, it may be necessary to call the `update` function. After that the object will be ready for prediction.

In [18]:
deserialized_ruleset.update(X, y, c2)
y_pred = deserialized_ruleset.predict(X)
display(y_pred)

array(['mammal', 'mammal', 'fish', 'mammal', 'mammal', 'mammal', 'mammal',
       'fish', 'fish', 'mammal', 'mammal', 'bird', 'fish', 'invertebrate',
       'invertebrate', 'invertebrate', 'bird', 'mammal', 'fish', 'mammal',
       'bird', 'bird', 'mammal', 'bird', 'insect', 'amphibian',
       'amphibian', 'mammal', 'mammal', 'mammal', 'insect', 'mammal',
       'mammal', 'bird', 'fish', 'mammal', 'mammal', 'bird', 'fish',
       'insect', 'insect', 'bird', 'insect', 'bird', 'mammal', 'mammal',
       'invertebrate', 'mammal', 'mammal', 'mammal', 'mammal', 'insect',
       'amphibian', 'invertebrate', 'mammal', 'mammal', 'bird', 'bird',
       'bird', 'bird', 'fish', 'fish', 'reptile', 'mammal', 'mammal',
       'mammal', 'mammal', 'mammal', 'mammal', 'mammal', 'mammal', 'bird',
       'invertebrate', 'fish', 'mammal', 'mammal', 'reptile',
       'invertebrate', 'bird', 'bird', 'reptile', 'invertebrate', 'fish',
       'bird', 'mammal', 'invertebrate', 'fish', 'bird', 'insect',
      