In [8]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import RandomizedSearchCV

# Example DataFrame loading (replace with your actual data source)
data = pd.read_csv('../data/final_dataset_1.csv')

# Drop price over 1.3M
data = data[data['price'] <= 1300000]

# Convert to DataFrame
df = pd.DataFrame(data)

# Separate features and target
X = df.drop('price', axis=1)
y = df['price']

# Split the data into training and testing sets
# Assume X and y are your features and target variable respectively
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=2)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=2)

In [9]:
import numpy as np
params = {
    'max_depth': range(3, 10),
    'learning_rate': np.linspace(0.01, 0.2, 25),
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
    'subsample': np.linspace(0.5, 1, 10),
    'colsample_bytree': np.linspace(0.5, 1, 10)
}
model = xgb.XGBRegressor()
# Configurarea RandomizedSearchCV
search = RandomizedSearchCV(model, params, n_iter=1500, cv=8, scoring='r2', random_state=42)

# Antrenarea și căutarea celor mai buni hiperparametri
search.fit(X_train, y_train)

# Afișarea celor mai buni parametri și a scorului
print("Best parameters:", search.best_params_)
print("Best score:", search.best_score_)

# Predictions
y_pred = search.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R² Score:", r2)

Best parameters: {'subsample': 0.9444444444444444, 'n_estimators': 700, 'max_depth': 7, 'learning_rate': 0.03375, 'colsample_bytree': 0.5}
Best score: 0.6991265662614875
Mean Squared Error: 14535121284.08566
R² Score: 0.704100093878931
