In [1]:
from utils import prepare_jupyter
prepare_jupyter()

In [2]:
import numpy as np

from data import Dataset
from structure import CatboostEnsemble
from measures import bin_q, bin_corr, bin_entropy, bin_kw

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [3]:
train_set, val_set = Dataset.from_openml('boston', version=2).split(0.5)

Downloading boston set.


In [4]:
clf = CatboostEnsemble({ 'n_estimators': 5, 'max_depth': 4 })
clf.fit(train_set)

In [5]:
preds = [tree.predict(val_set.X, labeled_result=True) for tree in clf.trees]
pred_a, pred_b, pred_c, pred_d, pred_e = preds
pred_b_inv = np.abs(pred_b - 1)

In [6]:
from pycm import ConfusionMatrix



cm_1 = ConfusionMatrix(actual_vector=pred_a, predict_vector=pred_b)
cm_2 = ConfusionMatrix(actual_vector=pred_a, predict_vector=pred_c)
cm_3 = ConfusionMatrix(actual_vector=pred_b, predict_vector=pred_c)
cm_same = ConfusionMatrix(actual_vector=pred_b, predict_vector=pred_b)
cm_diff = ConfusionMatrix(actual_vector=pred_b, predict_vector=pred_b_inv)

def relevant_stats(cm):
    print(f'ChiSquared = {cm.Chi_Squared}')
    print(f'CrossEntropy = {cm.CrossEntropy}')
    print(f'Pearson = {cm.overall_stat["Pearson C"]}')
    print(f'Cond. Entropy = {cm.overall_stat["Conditional Entropy"]}')
    print(f'Cond. Entropy = {cm.overall_stat["Kappa"]}')
    print()

print(val_set.X.shape)
relevant_stats(cm_same)
relevant_stats(cm_diff)
relevant_stats(cm_1)
relevant_stats(cm_2)
relevant_stats(cm_3)

# list(cm_1.overall_stat.keys())

(253, 13)
ChiSquared = 253.0
CrossEntropy = 0.9656361333706098
Pearson = 0.7071067811865476
Cond. Entropy = -0.0
Cond. Entropy = 1.0

ChiSquared = 253.0
CrossEntropy = 1.1042078552434993
Pearson = 0.7071067811865476
Cond. Entropy = -0.0
Cond. Entropy = -0.9097472924187725

ChiSquared = 162.60531950751468
CrossEntropy = 0.9706751050750786
Pearson = 0.6254992967192065
Cond. Entropy = 0.4462377649494061
Cond. Entropy = 0.8015815959741193

ChiSquared = 109.90810120557771
CrossEntropy = 0.9807921631611685
Pearson = 0.5503215516480571
Cond. Entropy = 0.6542077301908472
Cond. Entropy = 0.6542947027233149

ChiSquared = 178.02166458915138
CrossEntropy = 0.9788945261032005
Pearson = 0.642668300427614
Cond. Entropy = 0.383870192116421
Cond. Entropy = 0.8309415470773539



In [7]:
print(bin_q(val_set.y, pred_a, pred_b))
print(bin_q(val_set.y, pred_a, pred_c))
print(bin_q(val_set.y, pred_b, pred_c))
print(bin_q(val_set.y, pred_a, pred_a))

0.9580190226303706
0.8013937282229965
0.9639699749791493
1.0


In [8]:
print(bin_corr(val_set.y, pred_a, pred_b))
print(bin_corr(val_set.y, pred_a, pred_c))
print(bin_corr(val_set.y, pred_b, pred_c))
print(bin_corr(val_set.y, pred_b, pred_b_inv))

0.662348314980628
0.40450497864947405
0.6857752503744251
-1.0


In [9]:
# preds
# print(bin_entropy(val_set.y, preds))

In [10]:
print(clf.entropy(val_set))
print(clf.q(val_set))
print(clf.corr(val_set))
print(clf.df(val_set))
print(clf.kw(val_set))

0.3537549407114624
0.5025976524062725
0.28054244880259727
0.08142292490118577
0.001937227577371932
