# Compare `l3wrapper` and `AL3.jar`

## Setup

In [1]:
import random
from os.path import join
from typing import Tuple, List

import arff

from src import DEFAULT_DIR
from src.dataset import Dataset


def load_arff(f) -> Dataset:
    a = arff.load(f)
    dataset = Dataset(a['data'], a['attributes'])

    return dataset

def import_dataset_arff(f, explain_indices: List[int],
                        random_explain_dataset: bool) -> Tuple[Dataset, Dataset, List[str]]:
    dataset = load_arff(f)

    dataset_len = len(dataset)
    train_indices = list(range(dataset_len))

    if random_explain_dataset:
        random.seed(1)
        # small dataset
        MAX_SAMPLE_COUNT = 100
        if dataset_len < (2 * MAX_SAMPLE_COUNT):
            samples = int(0.2 * dataset_len)
        else:
            samples = MAX_SAMPLE_COUNT

        # Randomly pick some instances to remove from the training dataset and use in the
        # explain dataset
        explain_indices = list(random.sample(train_indices, samples))
    for i in explain_indices:
        train_indices.remove(i)

    train_dataset = Dataset.from_indices(train_indices, dataset)
    explain_dataset = Dataset.from_indices(explain_indices, dataset)

    return train_dataset, explain_dataset, [str(i) for i in explain_indices]

/home/andrea/Documents/Politecnico/tirocinio/src/../


In [13]:
def l3wrapper(instance_ix, train, clf):
    decoded_instance = train.inverse_transform_instance(train[instance_ix])
    encoded_rules = clf.lvl1_rules_

    def decode_rule(r_, clf_):
        r_class = clf_._class_dict[r_.class_id]
        r_attr_ixs_and_values = sorted([clf_._item_id_to_item[i] for i in r_.item_ids])
        r_attrs_and_values = [(clf_._column_id_to_name[c], v) for c, v in r_attr_ixs_and_values]
        return {'body': r_attrs_and_values, 'class': r_class, 'sup': r_.support}
    
    rules = []
   
    for r in encoded_rules:
        # For each of its attributes and values
        for a,v in decode_rule(r, clf)['body']:
            # If rule uses an attribute's value different from the instance's
            if decoded_instance[a] != v:
                # Exit the inner loop, not entering the else clause, therefore not adding the rule
                break
        # https://docs.python.org/3/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops
        else:
            # If the inner loop has completed normally without break-ing, then all of the rule's
            # attribute values are in the instance as well, so we will use this rule

            # Get the instance attribute index from the rule's item_ids
            di = decoded_instance.index
            
            # Class matching
            if decode_rule(r, clf)['class'] == decoded_instance.iloc[-1]:
                rules.append(
                    (
                    list(sorted([di.get_loc(a) + 1 for a, v in decode_rule(r, clf)['body']])),
                    decode_rule(r, clf)['sup']
                    )
                )
    
    return rules

## Monks

In [3]:
with open(join(DEFAULT_DIR, "datasets/monks.arff")) as monks_f:
    monks_train, monks_explain, monks_explain_indices = import_dataset_arff(monks_f, [], True)

In [4]:
from tabulate import tabulate

In [5]:
from l3wrapper.l3wrapper import L3Classifier

Vediamo che nessuno dei classifiers genera regole con un supporto sotto 7.5. Anche quello con min_sup=0.01 cioe' supporto minimo 1.

In [47]:
table = []
    
clf1 = L3Classifier(min_sup=0.01, min_conf=0.50)
clf1.fit(monks_train.X_decoded(),
         monks_train.Y_decoded(),
        column_names=monks_train.X_decoded().columns.to_list())
clf2 = L3Classifier(min_sup=0.05, min_conf=0.50)
clf2.fit(monks_train.X_decoded(),
        monks_train.Y_decoded(),
        column_names=monks_train.X_decoded().columns.to_list())
clf3 = L3Classifier(min_sup=0.075, min_conf=0.50)
clf3.fit(monks_train.X_decoded(),
        monks_train.Y_decoded(),
        column_names=monks_train.X_decoded().columns.to_list())

for i in range(len(monks_train)):
    a = l3wrapper(i, monks_train, clf1)
    for r,sup in a:
        if sup < 7.5:
            print('a',i,sup)
    b = l3wrapper(i, monks_train, clf2)
    for r,sup in b:
        if sup < 7.5:
            print('b',i)
    c = l3wrapper(i, monks_train, clf3)
    for r,sup in c:
        if sup < 7.5:
            print('c',i)
    table.append([a, b, c])

display(HTML(tabulate(table, tablefmt='html', headers=[0.01, 0.05,  0.075], showindex=True)))

Unnamed: 0,0.01,0.05,0.075
0,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
1,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
2,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
3,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
4,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
5,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
6,"[([1, 2], 51)]","[([1, 2], 51)]","[([1, 2], 51)]"
7,"[([5], 111), ([1, 2], 51)]","[([5], 111), ([1, 2], 51)]","[([5], 111), ([1, 2], 51)]"
8,"[([1, 2, 5], 11)]","[([1, 2, 5], 11)]","[([1, 2], 42)]"
9,"[([1, 2, 5], 16)]","[([1, 2, 5], 16)]","[([1, 2, 5], 16), ([1, 2], 42)]"


In [49]:
with open(join(DEFAULT_DIR, "datasets/monks_extended.arff")) as monks_extended_f:
    monks_extended_train, monks_extended_explain, monks_extended_explain_indices = import_dataset_arff(monks_extended_f, [], True)

In [50]:
table = []
    
clf1 = L3Classifier(min_sup=0.01, min_conf=0.50)
clf1.fit(monks_extended_train.X_decoded(),
         monks_extended_train.Y_decoded(),
        column_names=monks_extended_train.X_decoded().columns.to_list())
clf2 = L3Classifier(min_sup=0.05, min_conf=0.50)
clf2.fit(monks_extended_train.X_decoded(),
        monks_extended_train.Y_decoded(),
        column_names=monks_extended_train.X_decoded().columns.to_list())
clf3 = L3Classifier(min_sup=0.075, min_conf=0.50)
clf3.fit(monks_extended_train.X_decoded(),
        monks_extended_train.Y_decoded(),
        column_names=monks_extended_train.X_decoded().columns.to_list())

for i in range(len(monks_train)):
    a = l3wrapper(i, monks_extended_train, clf1)
    for r,sup in a:
        if sup < 7.5:
            print('a',i,sup)
    b = l3wrapper(i, monks_extended_train, clf2)
    for r,sup in b:
        if sup < 7.5:
            print('b',i)
    c = l3wrapper(i, monks_extended_train, clf3)
    for r,sup in c:
        if sup < 7.5:
            print('c',i)
    table.append([a, b, c])

display(HTML(tabulate(table, tablefmt='html', headers=[0.01, 0.05,  0.075], showindex=True)))

Unnamed: 0,0.01,0.05,0.075
0,"[([1, 2, 5], 791)]","[([1, 2, 5], 791)]","[([1, 2, 5], 791)]"
1,"[([1, 2, 5], 817)]","[([1, 2, 5], 817)]","[([1, 2, 5], 817)]"
2,"[([1, 2, 5], 909)]","[([1, 2, 5], 909)]","[([1, 2, 5], 909)]"
3,"[([1, 2, 5], 895)]","[([1, 2, 5], 895)]","[([1, 2, 5], 895)]"
4,"[([1, 2, 5], 799)]","[([1, 2, 5], 799)]","[([1, 2, 5], 799)]"
5,"[([1, 2, 5], 817)]","[([1, 2, 5], 817)]","[([1, 2, 5], 817)]"
6,"[([1, 2, 5], 799)]","[([1, 2, 5], 799)]","[([1, 2, 5], 799)]"
7,"[([1, 2, 5], 862)]","[([1, 2, 5], 862)]","[([1, 2, 5], 862)]"
8,"[([1, 2, 5], 834)]","[([1, 2, 5], 834)]","[([1, 2, 5], 834)]"
9,"[([1, 2, 5], 834)]","[([1, 2, 5], 834)]","[([1, 2, 5], 834)]"
