In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
import sys
import os

current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.append(parent_dir)

In [3]:
from models.price_evaluator_tree import PriceClassifierModel
from training.hyperparameter_tuning import hyperparameter_tuning
from evaluation.evaluate_classification import evaluate_classification
from parameters.model_hyperparameters import XGBoostHyperparams
from utils.classification_range_generator import generate_price_intervals
from utils.mapping import intervals_to_labels, labels_to_intervals
from utils.classify import classify
from utils.class_reduction import class_reduction
from data_processing.dtype_mapping import dtype_mapping

In [4]:
data = pd.read_csv("../data/processed_car_sale_ads.csv", low_memory=False)
data = dtype_mapping(data)
data.head(10)

Unnamed: 0,Index,Price,Currency,Condition,Vehicle_brand,Vehicle_model,Vehicle_generation,Production_year,Mileage_km,Power_HP,...,Shift paddles,Speed limiter,Start-Stop system,Sunroof,TV tuner,Tinted windows,Twilight sensor,USB socket,Velor upholstery,Xenon lights
0,0,86200,PLN,New,Abarth,595,Unknown,2021,1,145,...,0,0,0,0,0,0,0,0,0,0
1,1,43500,PLN,Used,Abarth,Other,Unknown,1974,59000,75,...,0,0,0,0,0,0,0,0,0,0
2,2,44900,PLN,Used,Abarth,500,Unknown,2018,52000,180,...,0,0,0,0,0,0,0,0,0,0
3,3,39900,PLN,Used,Abarth,500,Unknown,2012,29000,160,...,0,0,0,0,0,0,0,1,0,1
4,4,97900,PLN,New,Abarth,595,Unknown,2021,600,165,...,0,0,0,0,0,1,0,1,0,0
5,5,62950,PLN,Used,Abarth,595,Unknown,2016,46060,180,...,0,0,0,0,0,1,1,1,0,0
6,6,69900,PLN,Used,Abarth,595,Unknown,2021,2900,145,...,0,0,0,0,0,0,0,0,0,0
7,7,69900,PLN,Used,Abarth,500,Unknown,2019,20000,145,...,0,0,0,0,0,0,0,1,0,1
8,8,120750,PLN,New,Abarth,595,Unknown,2021,1,180,...,0,0,1,0,0,1,1,1,0,1
9,9,93100,PLN,New,Abarth,595,Unknown,2021,1,145,...,0,0,1,0,0,1,1,1,0,1


In [5]:
intervals = generate_price_intervals(data["Price"].min(), data["Price"].max(), 10000, 10000, 500)
labels = classify(data, "Price", intervals)
labels, intervals = class_reduction(labels, intervals)

In [6]:
X = data.iloc[:, 3:]

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, random_state=42)

In [7]:
param_grid = {
    "learning_rate": [0.01, 0.1, 0.2],
    "max_depth": [3, 5, 7],
    "n_estimators": [50, 100, 200],
    "min_child_weight": [1, 3, 5],
    "gamma": [0, 0.1, 0.2],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.8, 1.0],
    "reg_alpha": [0, 0.01, 0.1, 1.0],
    "reg_lambda": [0, 1.0, 2.0, 5.0],
}

In [8]:
best_parameters = hyperparameter_tuning(
    PriceClassifierModel,
    param_grid,
    evaluate_classification,
    X_train,
    y_train,
    X_test,
    y_test,
    XGBoostHyperparams
)
best_parameters

Parameters: { "use_label_encoder" } are not used.



TypeError: '<' not supported between instances of 'ClassificationEvaluationResults' and 'float'