In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from src.preprocessing import Preprocessor
from catboost import CatBoostRegressor
import pandas as pd
import numpy as np  

In [17]:
dataset = pd.read_csv("data/pf_suvs_i302_1s2025.csv")

In [18]:
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
val, test = train_test_split(test, test_size=0.5, random_state=42)

In [19]:
preprocessor = Preprocessor(train)

In [20]:
train = preprocessor.preprocess(train)

In [21]:
val = preprocessor.preprocess(val)

In [22]:
x_train = train.drop(columns=["Precio"])
y_train = train["Precio"]

x_val = val.drop(columns=["Precio"])
y_val = val["Precio"]

In [23]:
learning_rates = [0.01, 0.05, 0.1, 0.2, 0.5]

for lr in learning_rates:
    cboost = CatBoostRegressor(
        iterations=1500,
        learning_rate=lr,
        depth=8,    
        loss_function='RMSE',
        eval_metric='RMSE',
        random_seed=42,
        verbose=100,
        text_features=['Versión', 'Título', 'Descripción'], 
        bootstrap_type= 'Bernoulli',
        subsample=0.75, 
        min_data_in_leaf=10,
        colsample_bylevel=0.75, 
    )
    
    cboost.fit(
        x_train, y_train,
        eval_set=(x_val, y_val),
        use_best_model=True,
        early_stopping_rounds=100
    )
    
    print(f"Learning Rate: {lr}, Best Iteration: {cboost.get_best_iteration()}")
    y_pred = cboost.predict(x_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    r2 = r2_score(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    print(f"RMSE: {rmse}, R2: {r2}, MAE: {mae}\n")

0:	learn: 20024.7529510	test: 22298.4709064	best: 22298.4709064 (0)	total: 433ms	remaining: 10m 49s
100:	learn: 12667.4830232	test: 15310.7751220	best: 15310.7751220 (100)	total: 1m 3s	remaining: 14m 40s
200:	learn: 9905.7559609	test: 12087.9241086	best: 12087.9241086 (200)	total: 2m 5s	remaining: 13m 30s
300:	learn: 8803.3286311	test: 10565.9595777	best: 10565.9595777 (300)	total: 3m 8s	remaining: 12m 31s
400:	learn: 8146.8122211	test: 9563.3752639	best: 9563.3752639 (400)	total: 4m 10s	remaining: 11m 25s
500:	learn: 7671.5852659	test: 8892.5873899	best: 8892.5873899 (500)	total: 5m 14s	remaining: 10m 27s
600:	learn: 7283.5375861	test: 8403.3530358	best: 8403.3530358 (600)	total: 6m 15s	remaining: 9m 21s
700:	learn: 6991.7634884	test: 8057.1581628	best: 8057.1581628 (700)	total: 7m 21s	remaining: 8m 23s
800:	learn: 6741.2974414	test: 7828.0116790	best: 7828.0116790 (800)	total: 8m 22s	remaining: 7m 18s
900:	learn: 6520.9114901	test: 7626.1779535	best: 7626.1779535 (900)	total: 9m 21s	