In [15]:
import wittgenstein as rule
import torch
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text
from datasets.dataset import transform_dataset, kfold_dataset
from R2Ntab import train as train_r2ntab, R2Ntab

In [16]:
# Read datasets
name = 'adult'
X, Y, X_headers, Y_headers = transform_dataset(name, method='onehot-compare', negations=False, labels='binary')
datasets = kfold_dataset(X, Y, shuffle=1)
X_train, X_test, Y_train, Y_test = datasets[0]
train_set = torch.utils.data.TensorDataset(torch.Tensor(X_train.to_numpy()), torch.Tensor(Y_train))
test_set = torch.utils.data.TensorDataset(torch.Tensor(X_test.to_numpy()), torch.Tensor(Y_test))

In [17]:
X_train = pd.DataFrame(X_train)
X_train = X_train.sort_index(axis=1)
X_test = pd.DataFrame(X_test)
X_test = X_test.sort_index(axis=1)

In [23]:
rule_learners = ['r2ntab', 'ripper', 'cart', 'c4.5']
def run_learner(rule_learner):
    if rule_learner == 'r2ntab':
        model = R2Ntab(train_set[:][0].size(1), 50, 1)
        train_r2ntab(model, train_set, test_set=test_set, device='cpu', lr_rules=1e-2, lr_cancel=1e-2, 
                epochs=1000, batch_size=400, and_lam=1e-2, or_lam=1e-5, cancel_lam=1e-4, num_alter=500)
        acc = (model.predict(np.array(X_test)) == Y_test).mean()
        sparsity = sum(map(len, model.get_rules(X_headers)))
    elif rule_learner == 'ripper':
        model = rule.RIPPER()
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = sum(len(rule) for rule in model.ruleset_)
    elif rule_learner == 'cart':
        model = DecisionTreeClassifier()
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = export_text(model, feature_names=X_train.columns.tolist()).count('(')
    elif rule_learner == 'c4.5':
        model = DecisionTreeClassifier(criterion='entropy')
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = export_text(model, feature_names=X_train.columns.tolist()).count('(')
        
    return acc, sparsity

In [21]:
runs = 3
accuracies = {}
sparsities = {}
for learner in rule_learners:
    accuracies[learner] = []
    sparsities[learner] = []
    
for run in range(runs):
    print(f'run {run+1}') 
    for learner in rule_learners:
        acc, sparsity = run_learner(learner)
        
        accuracies[learner].append(acc)
        sparsities[learner].append(sparsity)

run 1


Epoch: 100%|██████████| 1000/1000 [04:27<00:00,  3.73it/s, rules cancelled=74, l


run 2


Epoch: 100%|██████████| 1000/1000 [04:21<00:00,  3.82it/s, rules cancelled=88, l


run 3


Epoch: 100%|██████████| 1000/1000 [04:18<00:00,  3.87it/s, rules cancelled=75, l


In [22]:
print(accuracies)
print(sparsities)
for learner in rule_learners:
    print(learner, np.mean(accuracies[learner]), np.std(accuracies[learner]), np.mean(sparsities[learner]), np.std(sparsities[learner]))

{'r2ntab': [None, None, None], 'ripper': [0.8284435604177026, 0.827780540361346, 0.8247969501077408], 'cart': [0.7860102768108735, 0.7881650919940328, 0.7858445217967843], 'c4.5': [0.7894911321067463, 0.7868390518813194, 0.7825294215150008]}
{'r2ntab': [55, 46, 92], 'ripper': [236, 218, 226], 'cart': [1254, 1256, 1254], 'c4.5': [1088, 1094, 1090]}


TypeError: unsupported operand type(s) for +: 'NoneType' and 'NoneType'