In [4]:
import sys
import os

current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.append(parent_dir)

from training.testing_intervals import testing_intervals
from models.price_evaluator_decision_tree import PriceClassifierBasicModel, DecisionTreeHyperparams
from evaluation.AbstractEvaluationResults import MetricEnum
from models.price_evaluator_xgboost_clasifier import (
    PriceClassifierXGBoostModelHyperparams,
    PriceClassifierXGBoostModel,
)
from models.price_evaluator_xgboost_regression import (
    PriceRegressorXGBoostModel,
    PriceRegressorXGBoostModelHyperparams
)
from models.price_evaluator_random_forest_regression import (
    PriceRegressorRandomForestModel,
    PriceRegressorRandomForestHyperparams
)

In [5]:
functions = [
    lambda x: 10 ** (0.66 * x) + 1500 * (x + 1) ** 1.3 + 800 * x ** 1.1,
    lambda x: 10 ** (0.25 * x) + 600 * (x + 1) ** 1.15 + 200 * x ** 0.75,
    lambda x: 10 ** (0.19 * x) + 80 * (x + 1) ** 1.02 + 150 * x ** 0.4,
    lambda x: 10 ** (0.13 * x) + 50 * (x + 1) ** 1.01 + 100 * x ** 0.35,
    lambda x: 10 ** (0.1 * x) + 30 * (x + 1) ** 1.005 + 90 * x ** 0.3,
    lambda x: 10 ** (0.08 * x) + 15 * (x + 1) ** 1.001 + 60 * x ** 0.3,
    lambda x: 10 ** (0.06 * x) + 15 * (x + 1) ** 1.001 + 40 * x ** 0.25,
    lambda x: 10 ** (0.05 * x) + 10 * (x + 1) ** 1.002 + 30 * x ** 0.2,
]

In [6]:
testing_intervals(
    model_name="basic_tree_classifier",
    model_class=PriceClassifierBasicModel,
    hyperparameters_class=DecisionTreeHyperparams,
    metric=MetricEnum.MEAN_CLASTERS_ERROR,
    intervals_functions=functions,
    category_encoding=True,
    const_params={
        "criterion": "entropy",
        "splitter": "best",
        "max_depth": 20,
        "min_samples_split": 2,
        "min_samples_leaf": 1,
        "min_weight_fraction_leaf": 0.0,
        "max_features": "sqrt",
        "max_leaf_nodes": 37,
        "min_impurity_decrease": 0.0,
        "ccp_alpha": 0.0
    }
)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Price intervals:
585 - 2086
2086 - 6584
6584 - 14576
14576 - 26444
26444 - 42711
42711 - 64810
64810 - 98496
98496 - 169378
169378 - 393901
393901 - 717877
DecisionTreeHyperparams(criterion='entropy', splitter='best', max_depth=20, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', max_leaf_nodes=37, min_impurity_decrease=0.0, ccp_alpha=0.0)
Mean_clasters_error: 0.5934838888255695
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
Price intervals:
585 - 1186
1186 - 2719
2719 - 5180
5180 - 8596
8596 - 12990
12990 - 18386
18386 - 24807
24807 - 32280
32280 - 40839
40839 - 50531
50531 - 61428
61428 - 73650
73650 - 87399
87399 - 103026
103026 - 121145
121145 - 142843
142843 - 170044
170044 - 206162
206162 - 257262
257262 - 334123
334123 - 455907
455907 - 656683
656683 - 717877
DecisionTreeHyperparams(criterion='entropy', splitter='best', max_depth=20, min_samples_split=2, min_samples_leaf

In [7]:
testing_intervals(
    model_name="xgboost_classifier",
    model_class=PriceClassifierXGBoostModel,
    hyperparameters_class=PriceClassifierXGBoostModelHyperparams,
    metric=MetricEnum.MEAN_CLASTERS_ERROR,
    intervals_functions=functions,
    gpu_mode=False,
    const_params={
        "learning_rate": 0.12921360029190918,
        "reg_alpha": 8.440813134554679,
        "reg_lambda": 6.254500283477481,
        "max_depth": 15,
        "n_estimators": 486,
        "min_child_weight": 6,
        "gamma": 0.6304338645485753,
        "subsample": 0.8040953946703557,
        "colsample_bytree": 0.9651088241133051,
        "max_delta_step": 4.410373590930435,
        "colsample_bynode": 0.8065733591903823,
        "colsample_bylevel": 0.7476323660748343
    },
)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Price intervals:
585 - 2086
2086 - 6584
6584 - 14576
14576 - 26444
26444 - 42711
42711 - 64810
64810 - 98496
98496 - 169378
169378 - 393901
393901 - 717877
PriceClassifierXGBoostModelHyperparams(learning_rate=0.12921360029190918, reg_alpha=8.440813134554679, reg_lambda=6.254500283477481, max_depth=15, n_estimators=486, min_child_weight=6, gamma=0.6304338645485753, subsample=0.8040953946703557, colsample_bytree=0.9651088241133051, max_delta_step=4.410373590930435, colsample_bynode=0.8065733591903823, colsample_bylevel=0.7476323660748343)
Mean_clasters_error: 0.24905156552745983
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
Price intervals:
585 - 1186
1186 - 2719
2719 - 5180
5180 - 8596
8596 - 12990
12990 - 18386
18386 - 24807
24807 - 32280
32280 - 40839
40839 - 50531
50531 - 61428
61428 - 73650
73650 - 87399
87399 - 103026
103026 - 121145
121145 - 142843
142843 - 170044
170044 - 206162
206162 - 257262
257262 - 334123
33

In [8]:
testing_intervals(
    model_name="xgboost_regressor",
    model_class=PriceRegressorXGBoostModel,
    hyperparameters_class=PriceRegressorXGBoostModelHyperparams,
    metric=MetricEnum.MEAN_CLASTERS_ERROR,
    intervals_functions=functions,
    gpu_mode=False,
    const_params={
        "learning_rate": 0.24099746618946757,
        "reg_alpha": 1.8343478986616382,
        "reg_lambda": 7.796910002727695,
        "max_depth": 10,
        "n_estimators": 251,
        "min_child_weight": 2,
        "gamma": 2.296244459829336,
        "subsample": 0.666854305569511,
        "colsample_bytree": 0.5714334089609704,
        "max_delta_step": 6.50888472948853,
        "colsample_bynode": 0.5282057895135501,
        "colsample_bylevel": 0.8609993861334124,
        "objective": "reg:tweedie"
    }
)

Classification metrics:
   Accuracy: 0.8473
  Precision: 0.8477
  Recall: 0.8473
  F1 Score: 0.8473
  Mean Classes Error: 0.1576
  Ideal Distance: 0.3052
Classification metrics:
   Accuracy: 0.7020
  Precision: 0.7030
  Recall: 0.7020
  F1 Score: 0.7021
  Mean Classes Error: 0.3399
  Ideal Distance: 0.5954
Classification metrics:
   Accuracy: 0.6333
  Precision: 0.6363
  Recall: 0.6333
  F1 Score: 0.6339
  Mean Classes Error: 0.4769
  Ideal Distance: 0.7316
Classification metrics:
   Accuracy: 0.5352
  Precision: 0.5392
  Recall: 0.5352
  F1 Score: 0.5362
  Mean Classes Error: 0.6823
  Ideal Distance: 0.9271
Classification metrics:
   Accuracy: 0.4719
  Precision: 0.4765
  Recall: 0.4719
  F1 Score: 0.4731
  Mean Classes Error: 0.8655
  Ideal Distance: 1.0532
Classification metrics:
   Accuracy: 0.4396
  Precision: 0.4443
  Recall: 0.4396
  F1 Score: 0.4407
  Mean Classes Error: 0.9959
  Ideal Distance: 1.1179
Classification metrics:
   Accuracy: 0.3457
  Precision: 0.3522
  Recall: 0.

In [9]:
testing_intervals(
    model_name="basic_regressor",
    model_class=PriceRegressorRandomForestModel,
    hyperparameters_class=PriceRegressorRandomForestHyperparams,
    metric=MetricEnum.MEAN_CLASTERS_ERROR,
    intervals_functions=functions,
    gpu_mode=False,
    const_params={
        "n_estimators": 60,
        "max_depth": 17,
        "min_samples_split": 5,
        "min_samples_leaf": 1,
        "max_features": "log2",
        "bootstrap": True
    },
    category_encoding=True,
)

Classification metrics:
   Accuracy: 0.7593
  Precision: 0.7597
  Recall: 0.7593
  F1 Score: 0.7528
  Mean Classes Error: 0.2540
  Ideal Distance: 0.4845
Classification metrics:
   Accuracy: 0.5597
  Precision: 0.5546
  Recall: 0.5597
  F1 Score: 0.5523
  Mean Classes Error: 0.5392
  Ideal Distance: 0.8869
Classification metrics:
   Accuracy: 0.5003
  Precision: 0.4940
  Recall: 0.5003
  F1 Score: 0.4950
  Mean Classes Error: 0.8122
  Ideal Distance: 1.0053
Classification metrics:
   Accuracy: 0.4021
  Precision: 0.3973
  Recall: 0.4021
  F1 Score: 0.3980
  Mean Classes Error: 1.1534
  Ideal Distance: 1.2003
Classification metrics:
   Accuracy: 0.3413
  Precision: 0.3368
  Recall: 0.3413
  F1 Score: 0.3379
  Mean Classes Error: 1.4685
  Ideal Distance: 1.3213
Classification metrics:
   Accuracy: 0.3189
  Precision: 0.3144
  Recall: 0.3189
  F1 Score: 0.3158
  Mean Classes Error: 1.7165
  Ideal Distance: 1.3660
Classification metrics:
   Accuracy: 0.2360
  Precision: 0.2347
  Recall: 0.