In [18]:
import wittgenstein as rule
import torch
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text
from datasets.dataset import transform_dataset, kfold_dataset
from R2Ntab import train as train_r2ntab, R2Ntab

In [34]:
# Read datasets
name = 'house'
X, Y, X_headers, Y_headers = transform_dataset(name, method='onehot-compare', negations=False, labels='binary')
datasets = kfold_dataset(X, Y, shuffle=1)
X_train, X_test, Y_train, Y_test = datasets[0]
train_set = torch.utils.data.TensorDataset(torch.Tensor(X_train.to_numpy()), torch.Tensor(Y_train))
test_set = torch.utils.data.TensorDataset(torch.Tensor(X_test.to_numpy()), torch.Tensor(Y_test))

In [35]:
RX_train = pd.DataFrame(X_train)
RX_train = RX_train.sort_index(axis=1)
RX_test = pd.DataFrame(X_test)
RX_test = RX_test.sort_index(axis=1)

In [36]:
rule_learners = ['r2ntab', 'ripper', 'cart', 'c4.5']
def run_learner(rule_learner):
    if rule_learner == 'r2ntab':
        model = R2Ntab(train_set[:][0].size(1), 50, 1)
        train_r2ntab(model, train_set, test_set=test_set, device='cpu', lr_rules=1e-2, lr_cancel=1e-2, 
                epochs=2000, batch_size=400, and_lam=1e-2, or_lam=1e-5, cancel_lam=1e-5, num_alter=500)
        acc = (model.predict(np.array(X_test)) == Y_test).mean()
        sparsity = sum(map(len, model.get_rules(X_headers)))
    elif rule_learner == 'ripper':
        model = rule.RIPPER()
        model.fit(RX_train, Y_train)
        acc = model.score(RX_test, Y_test)
        sparsity = sum(len(rule) for rule in model.ruleset_)
    elif rule_learner == 'cart':
        model = DecisionTreeClassifier()
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = export_text(model, feature_names=X_train.columns.tolist()).count('(')
    elif rule_learner == 'c4.5':
        model = DecisionTreeClassifier(criterion='entropy')
        model.fit(X_train, Y_train)
        acc = model.score(X_test, Y_test)
        sparsity = export_text(model, feature_names=X_train.columns.tolist()).count('(')
        
    return acc, sparsity

In [37]:
runs = 1
accuracies = {}
sparsities = {}
for learner in rule_learners:
    accuracies[learner] = []
    sparsities[learner] = []
    
for run in range(runs):
    print(f'run {run+1}') 
    for learner in rule_learners:
        acc, sparsity = run_learner(learner)
        
        accuracies[learner].append(acc)
        sparsities[learner].append(sparsity)

run 1


In [38]:
for learner in rule_learners:
    print(learner, np.mean(accuracies[learner]), np.std(accuracies[learner]), np.mean(sparsities[learner]), np.std(sparsities[learner]))

ripper 0.8053543998244459 0.0 234.0 0.0
cart 0.8240070221637041 0.0 1792.0 0.0
c4.5 0.82642089093702 0.0 1690.0 0.0


In [7]:
import json

with open('heloc.txt', 'w') as file:
    file.write(json.dumps(accuracies))
    file.write(json.dumps(sparsities))