In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from xgboost import XGBClassifier
from scipy.stats import randint, uniform


In [4]:
hotel = pd.read_csv('../hotel_reservations.csv')
hotel['label_avg_price_per_room'] = hotel['avg_price_per_room'].apply(lambda x: 0 if x <= 85 else 1 if x <= 115 else 2)
hotel.drop(columns=['avg_price_per_room', 'no_of_children', 'required_car_parking_space', 'repeated_guest', 'no_of_previous_cancellations', 'no_of_previous_bookings_not_canceled', 'Booking_ID'], inplace=True)

In [5]:
X = hotel.drop('label_avg_price_per_room', axis=1)
y = hotel['label_avg_price_per_room']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
categorical_cols = [cname for cname in X.columns if X[cname].dtype == "object"]
numeric_cols = [cname for cname in X.columns if cname not in categorical_cols]

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ])

model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

# Ajustar o espaço de busca para hiperparâmetros


In [7]:
param_dist = {
    'model__n_estimators': randint(100, 300),
    'model__learning_rate': uniform(0.01, 0.2),
    'model__max_depth': randint(3, 10),
    'model__min_child_weight': randint(1, 10),
    'model__subsample': uniform(0.5, 0.5)
}


# Configurar RandomizedSearchCV


In [8]:
random_search = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=5, scoring='accuracy', n_jobs=-1, random_state=42)


In [9]:
random_search.fit(X_train, y_train)

print(f"Melhor pontuação: {random_search.best_score_ * 100:.2f}%")
print("Melhores hiperparâmetros:", random_search.best_params_)

Melhor pontuação: 85.28%
Melhores hiperparâmetros: {'model__learning_rate': 0.11454656587639882, 'model__max_depth': 9, 'model__min_child_weight': 3, 'model__n_estimators': 162, 'model__subsample': 0.9478817978367597}


In [10]:
# Avaliar o modelo no conjunto de teste
y_pred = random_search.best_estimator_.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisão no conjunto de teste: {accuracy * 100:.2f}%")


Precisão no conjunto de teste: 86.00%
