In [None]:
# All dependencies of this notebook

# third-party imports
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier

# local imports
from lib.model import OSDT
from lib.model_selection import train_cross_validate
from lib.data_processing import read_dataset

# Using COMPAS as an example
dataset = read_dataset('data/preprocessed/compas-binary.csv') 
(n, _m) = dataset.shape
X = dataset.values[:,:-1]
y = dataset.values[:,-1]

hyperparameters = {
    'regularizer': 0.005,
    'compress': True
}
model = OSDT(**hyperparameters)
model.fit(X, y)
model.predict(X)

X.values[:, 0]

In [6]:
import numpy as np
from time import time
# Test out the cluster library
from lib.data_processing import read_dataset
from lib.osdt_definition import OSDT

dataset = read_dataset('data/preprocessed/monk2-train.csv', sep=';')

X = dataset.values[:, :-1]
y = dataset.values[:, -1]
lamb = 0.005

start = time()
problem = OSDT(X, y, lamb)
print("\nRunning OSDT COMPAS consistency test")
model = problem.solve(clients=2, servers=1, visualize=True)
finish = time()
print('Training Time = {} seconds'.format(round(finish - start, 3)))

print('Optimal Model:\n', model.visualization)
print('Optimal Risk:\n', model.risk)


Running OSDT COMPAS consistency test
Worker 1 Done
Server 0 Done
Worker 0 Done
Training Time = 120.608 seconds
Optimal Model:
 (_,_,0,_,0,_,_,0,_,_,0) => predict 0, risk contribution 0.01091715976331361
(0,_,1,_,0,0,_,0,_,_,0) => predict 0, risk contribution 0.005
(1,_,1,_,0,0,_,0,_,_,0) => predict 1, risk contribution 0.005
(_,_,1,_,0,1,_,0,_,_,0) => predict 1, risk contribution 0.005
(0,_,_,_,0,_,_,1,_,_,0) => predict 1, risk contribution 0.005
(1,_,0,_,0,_,_,1,_,_,0) => predict 1, risk contribution 0.005
(1,_,1,_,0,_,_,1,_,_,0) => predict 0, risk contribution 0.005
(0,_,0,_,0,0,_,0,_,_,1) => predict 0, risk contribution 0.005
(0,_,0,_,0,0,_,1,_,_,1) => predict 1, risk contribution 0.005
(1,_,0,_,0,0,_,0,_,_,1) => predict 1, risk contribution 0.005
(1,_,0,_,0,0,_,1,_,_,1) => predict 0, risk contribution 0.005
(0,_,0,_,0,1,_,0,_,_,1) => predict 1, risk contribution 0.005
(0,_,0,_,0,1,_,1,_,_,1) => predict 0, risk contribution 0.005
(1,_,0,_,0,1,_,_,_,_,1) => predict 0, risk contribut

In [1]:
import numpy as np
from time import time
# Test out the cluster library
from lib.data_processing import read_dataset
from lib.osdt_definition import OSDT

dataset = read_dataset('tests/fixtures/split.csv', sep=';')

X = dataset.values[:, :-1]
y = dataset.values[:, -1]
lamb = 0.05

start = time()
problem = OSDT(X, y, lamb)
print("\nRunning OSDT split consistency test")
model = problem.solve(clients=2, servers=1, visualize=True)
finish = time()
print('Training Time = {} seconds'.format(round(finish - start, 3)))
print(model.visualization)
print(model.risk)


Running OSDT split consistency test
Worker 1 Done
Server 0 Done
Worker 0 Done
Training Time = 0.1 seconds
(_,_,_,_,_,_,_,_,0,_,_,_) => predict 0, risk contribution 0.1611111111111111
(_,_,_,_,_,_,_,_,1,_,_,_) => predict 1, risk contribution 0.1611111111111111
0.3222222222222222
