# Basic use of single-target IDS

In [20]:
%load_ext autoreload
%autoreload 2

import os

from typing import List
import pandas as pd

from mdrsl.project_info import project_dir
from mdrsl.toy_data.titanic import prepare_data_titanic
from mdrsl.data_structures.rules.pretty_printing import ids_car_to_pretty_string

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Loading the Titanic toy dataset

In [21]:
data_dir = os.path.join(project_dir, 'data/external')

df_train: pd.DataFrame
df_test: pd.DataFrame
df_train, df_test, dataset_name = prepare_data_titanic(data_dir, prop=0.25)
df_train.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Survived
0,3rd_class,adult,male,0
1,crew,adult,male,0
2,crew,adult,male,0
3,2nd_class,adult,male,0
4,crew,adult,male,0


In [22]:
df_train.nunique()

Passenger_Cat    4
Age_Cat          2
Gender           2
Survived         2
dtype: int64

## Mining multi-target association rules

In [23]:
# NOTE: requires PyFIM
from pyarc.data_structures.car import ClassAssocationRule
from mdrsl.rule_generation.association_rule_mining.fim_impl.mine_st_rules_with_fim import mine_CARs

support_threshold = 0.1
cars: List[ClassAssocationRule] = mine_CARs(df_train, rule_cutoff=50, sample=False)

STARTING top_rules
--- iteration 1 ---
Running apriori with setting: confidence=0.5, support=0.05, min_length=2, max_length=3, MAX_RULE_LEN=4
Rule count: 52, Iteration: 1
	Target rule count satisfied: 50
--- end iteration 1 ---
-----------------------
FINISHED top_rules after 1 iterations


In [24]:
rule: ClassAssocationRule
for rule in cars:
    print(ids_car_to_pretty_string(rule))


Passenger_Cat=2nd_class, Age_Cat=child -> Survived=1
Passenger_Cat=1st_class, Age_Cat=child -> Survived=1
Passenger_Cat=1st_class, Gender=female -> Survived=1
Passenger_Cat=2nd_class, Gender=female -> Survived=1
Passenger_Cat=2nd_class, Gender=male -> Survived=0
Gender=male, Passenger_Cat=3rd_class -> Survived=0
Passenger_Cat=crew, Gender=female -> Survived=1
Age_Cat=adult, Gender=male -> Survived=0
Gender=male -> Survived=0
Passenger_Cat=crew, Gender=male -> Survived=0
Passenger_Cat=crew -> Survived=0
Age_Cat=adult, Passenger_Cat=crew -> Survived=0
Age_Cat=adult, Passenger_Cat=3rd_class -> Survived=0
Age_Cat=adult, Gender=female -> Survived=1
Passenger_Cat=3rd_class -> Survived=0
Gender=female -> Survived=1
Age_Cat=adult -> Survived=0
Passenger_Cat=1st_class -> Survived=1
Passenger_Cat=1st_class, Age_Cat=adult -> Survived=1
Passenger_Cat=1st_class, Gender=male -> Survived=0
Passenger_Cat=2nd_class, Age_Cat=adult -> Survived=0
Age_Cat=child, Passenger_Cat=3rd_class -> Survived=0
Age_Ca

## Fitting a single-target IDS model

In [25]:
from pyarc.qcba.data_structures import QuantitativeDataFrame
quant_dataframe_train = QuantitativeDataFrame(df_train)
quant_dataframe_test = QuantitativeDataFrame(df_test)

In [26]:
from mdrsl.rule_models.ids.model_fitting.ids_without_value_reuse import IDS
from submodmax.value_reuse.randomized_double_greedy_search import RandomizedDoubleGreedySearch

RandomizedDoubleGreedySearch.N_TRIES = 1

ids = IDS()
algorithm="RDGS"
debug=False
ids.fit(quant_dataframe_train, class_association_rules=cars, algorithm=algorithm,debug=debug)

cover cache prepared
overlap cache prepared
solution set size 4




<mdrsl.rule_models.ids.model_fitting.ids_without_value_reuse.IDS at 0x7f42772528b0>

In [27]:
from mdrsl.rule_models.ids.ids_classifier import IDSClassifier
from pyids.data_structures.ids_rule import IDSRule

classifier: IDSClassifier = ids.clf
classifier_rules: List[IDSRule]  = classifier.rules
print(classifier)

IDS classifier (4 rules)
	Default value strategy: majority_class_in_all
	Default value: 0



In [28]:
rule: IDSRule
for rule in classifier_rules:
    print(ids_car_to_pretty_string(rule.car))

Gender=male -> Survived=0
Passenger_Cat=3rd_class -> Survived=0
Age_Cat=adult, Gender=female -> Survived=1
Age_Cat=adult, Gender=male -> Survived=1


In [29]:
acc = ids.score(quant_dataframe_test)
print('accuracy:', acc)

accuracy: 0.775


In [30]:
from pyids.data_structures.ids_ruleset import IDSRuleSet
from mdrsl.rule_models.ids.model_evaluation.ids_interpretability_metrics import (
    IDSInterpretabilityStatistics, IDSInterpretabilityStatisticsCalculator)

stats: IDSInterpretabilityStatistics = IDSInterpretabilityStatisticsCalculator.calculate_ruleset_statistics(
    IDSRuleSet(classifier_rules), quant_dataframe_test)
print(stats)

Rule length stats: count=4, sum=6, min=1, average=1.5, max=2
Fraction bodily overlap: 0.21174242424242423
Fraction uncovered examples: 0.013636363636363669
Fraction predicted classes: 1.0

