In [1]:
import wittgenstein as rule
import torch
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text
from datasets.dataset import transform_dataset, kfold_dataset
from R2Ntab import train as train_r2ntab, R2Ntab

In [2]:
# Read datasets
name = 'adult'
X, Y, X_headers, Y_headers = transform_dataset(name, method='onehot-compare', negations=False, labels='binary')
datasets = kfold_dataset(X, Y, shuffle=1)
X_train, X_test, Y_train, Y_test = datasets[0]
train_set = torch.utils.data.TensorDataset(torch.Tensor(X_train.to_numpy()), torch.Tensor(Y_train))
test_set = torch.utils.data.TensorDataset(torch.Tensor(X_test.to_numpy()), torch.Tensor(Y_test))

In [3]:
X_train = pd.DataFrame(X_train)
X_train = X_train.sort_index(axis=1)
X_test = pd.DataFrame(X_test)
X_test = X_test.sort_index(axis=1)

In [4]:
rule_learners = ['r2ntab', 'ripper', 'cart', 'c4.5']
def run_learner(rule_learner):
    if rule_learner == 'r2ntab':
        model = R2Ntab(train_set[:][0].size(1), 50, 1)
        train_r2ntab(model, train_set, test_set=test_set, device='cpu', lr_rules=1e-2, lr_cancel=1e-2, 
                epochs=1000, batch_size=400, and_lam=1e-2, or_lam=1e-5, cancel_lam=1e-4, num_alter=500)
        acc = (model.predict(np.array(X_test)) == Y_test).mean()
        sparsity = sum(map(len, model.get_rules(X_headers)))
    elif rule_learner == 'ripper':
        model = rule.RIPPER()
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = sum(len(rule) for rule in model.ruleset_)
    elif rule_learner == 'cart':
        model = DecisionTreeClassifier()
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = export_text(model, feature_names=X_train.columns.tolist()).count('(')
    elif rule_learner == 'c4.5':
        model = DecisionTreeClassifier(criterion='entropy')
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = export_text(model, feature_names=X_train.columns.tolist()).count('(')
        
    return acc, sparsity

In [None]:
runs = 10
accuracies = {}
sparsities = {}
for learner in rule_learners:
    accuracies[learner] = []
    sparsities[learner] = []
    
for run in range(runs):
    print(f'run {run+1}') 
    for learner in rule_learners:
        acc, sparsity = run_learner(learner)
        
        accuracies[learner].append(acc)
        sparsities[learner].append(sparsity)

run 1


Epoch: 100%|██████████| 1000/1000 [08:50<00:00,  1.88it/s, rules cancelled=73, loss=0.55, epoch accu=0.827, test accu=0.833, num rules=11, sparsity=0.879]


run 2


Epoch:  77%|███████▋  | 766/1000 [06:39<02:00,  1.95it/s, rules cancelled=77, loss=0.579, epoch accu=0.781, test accu=0.783, num rules=6, sparsity=0.887] 

In [6]:
for learner in rule_learners:
    print(learner, np.mean(accuracies[learner]), np.std(accuracies[learner]), np.mean(sparsities[learner]), np.std(sparsities[learner]))

r2ntab 0.6485662191281286 0.12073547438471621 98.9 37.98275924679512
ripper 0.8280291728824796 0.003417328156895165 250.9 32.352588768134154
cart 0.7873197414221781 0.0015627630268857367 1252.2 2.749545416973504
c4.5 0.7851649262390186 0.0016698532298878572 1089.6 1.9595917942265426


In [None]:
import json

with open('adult.txt', 'w') as file:
    file.write(json.dumps(rule_learners))