In [None]:
!pip install sagemaker pandas scikit-learn joblib

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

In [None]:
file_path = 'Hotel Reservations.csv'
df = pd.read_csv(file_path)

df['label_avg_price_per_room'] = pd.cut(df['avg_price_per_room'],
bins=[-np.inf, 85, 115, np.inf],
labels=[1, 2, 3])

df = df.drop(columns=['avg_price_per_room'])

In [None]:
X = df.drop(columns=['label_avg_price_per_room'])
y = df['label_avg_price_per_room'].astype(int)

In [None]:
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['number']).columns

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

In [None]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
pipeline.fit(X_train, y_train)

In [None]:
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia do modelo: {accuracy:.2f}')

In [None]:
# Salva o modelo usando joblib
joblib_file = "hotel_reservations_rf_model.joblib"
joblib.dump(pipeline, joblib_file)
print(f'Modelo salvo como {joblib_file}')

In [None]:
#Carrega o modelo do joblib
loaded_model = joblib.load(joblib_file)

#Testa o modelo carregado
y_loaded_pred = loaded_model.predict(X_test)
loaded_accuracy = accuracy_score(y_test, y_loaded_pred)
print(f'Acurácia do modelo carregado: {loaded_accuracy:.2f}')