# Accuracy Comparison

# Scalability Comparison

# Convergence Rate

# Memory Profile

# Runtime Profile

In [1]:
import cProfile

# local imports
from lib.parallel_osdt_classifier import ParallelOSDTClassifier
from lib.data_processing import read_dataset

# Using COMPAS as an example
dataset = read_dataset('data/preprocessed/compas-binary.csv', sep=";") 
(n, m) = dataset.shape
X = dataset.values[:n,:-1]
y = dataset.values[:n,-1]

hyperparameters = {
    'regularization': 0.005, # Regularization coefficient which effects the penalty on model complexity

    'max_depth': float('Inf'), # User-specified limit on the model
    'max_time': float('Inf'), # User-specified limit on the runtime 

    'clients': 1, # Parameter that varies based on how much computational resource is available
    'servers': 1, # Parameter that varies based on how much computational resource is available

    'visualize': False, # Toggle whether a rule-list visualization is rendered
    'verbose': False, # Toggle whether event messages are printed
    'log': False, # Toggle whether client processes log to logs/work_<id>.log files
    
    'configuration': { # More configurations around toggling optimizations and prioritization options
        'priority_metric': 'uncertainty', # Decides how tasks are prioritized
        'deprioritization': 0.01, # Decides how much to push back a task if it has pending dependencies

        # Note that Leaf Permutation Bound (Theorem 6) is 
        # Toggles the assumption about objective independence when composing subtrees (Theorem 1)
        # Disabling this actually breaks convergence due to information loss
        'hierarchical_lowerbound': True, 
        # Toggles whether problems are pruned based on insufficient accuracy (compared to other results) (Lemma 2)
        'look_ahead': True,
        # Toggles whether a split is avoided based on insufficient support (proxy for accuracy gain) (Theorem 3)
        'support_lowerbound': True,
        # Toggles whether a split is avoided based on insufficient potential accuracy gain (Theorem 4)
        'incremental_accuracy_lowerbound': True,
        # Toggles whether a problem is pruned based on insufficient accuracy (in general) (Theorem 5)
        'accuracy_lowerbound': True,
        # Toggles whether problem equivalence is based solely on the capture set (Similar to Corollary 6)
        'capture_equivalence': True,
        # Hamming distance used to propagate bounding information of similar problems (Theorem 7 + some more...)
        "similarity_threshold": 0,
        # Toggles whether equivalent points contribute to the lowerbound (Proposition 8 and Theorem 9)
        'equivalent_point_lowerbound': True,

        # Toggles compression of dataset based on equivalent point aggregation
        'equivalent_point_compression': True,
        # Toggles whether asynchronous tasks can be cancelled after being issued
        'task_cancellation': True,
        # Toggles whether look_ahead prunes using objective upperbounds (This builds on top of look_ahead)
        'interval_look_ahead': True,
        # Cooldown timer (seconds) on synchornization operations
        'synchronization_cooldown': 0.1
    }
}

model = ParallelOSDTClassifier(**hyperparameters)
model.fit(X, y)
cProfile.run('model.fit(X, y)', sort='tottime')

         40795655 function calls (40744695 primitive calls) in 82.740 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       40    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:416(parent)
       40    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:997(_handle_fromlist)
        1    0.048    0.048   82.741   82.741 <string>:1(<module>)
   169318    0.190    0.000    0.344    0.000 <string>:12(__new__)
       24    0.000    0.000    0.001    0.000 __init__.py:8(_make_name)
        1    0.000    0.000    0.000    0.000 _weakrefset.py:38(_remove)
        4    0.000    0.000    0.000    0.000 _weakrefset.py:81(add)
        1    0.000    0.000   82.655   82.655 client.py:12(__run__)
        1    0.000    0.000    0.000    0.000 cluster.py:10(__init__)
        1    0.000    0.000   82.667   82.667 cluster.py:16(compute)
        1    0.000    0.000    0.000    0.000 cluster.py:17(<genexpr>)
       