In [5]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import pickle

In [6]:
df = pd.read_csv("hotel_bookings_with_tiers.csv")

print("Dataset shape:", df.shape)
df.head()

Dataset shape: (119209, 39)


Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,total_of_special_requests,reservation_status,reservation_status_date,name,email,phone-number,credit_card,TotalSpend,Tier,NextTier
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,0,Check-Out,2015-07-01,Ernest Barnes,Ernest.Barnes31@outlook.com,669-792-1661,************4322,0.0,Copper,Copper
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,0,Check-Out,2015-07-01,Andrea Baker,Andrea_Baker94@aol.com,858-637-6955,************9157,0.0,Copper,Copper
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,0,Check-Out,2015-07-02,Rebecca Parker,Rebecca_Parker@comcast.net,652-885-2745,************3734,75.0,Copper,Copper
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,0,Check-Out,2015-07-02,Laura Murray,Laura_M@gmail.com,364-656-8427,************5677,75.0,Copper,Copper
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,1,Check-Out,2015-07-03,Linda Hines,LHines@verizon.com,713-226-5883,************5498,196.0,Copper,Copper


In [7]:
features = [
    "stays_in_week_nights",
    "stays_in_weekend_nights",
    "adr",
    "previous_cancellations",
    "Tier"
]

X = df[features]
y = df["NextTier"]

In [8]:
tier_encoder = LabelEncoder()

X["Tier"] = tier_encoder.fit_transform(X["Tier"])
y = tier_encoder.fit_transform(y)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["Tier"] = tier_encoder.fit_transform(X["Tier"])


In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

In [10]:
model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

print("Model training completed")

Model training completed


In [11]:
y_pred = model.predict(X_test)

print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Model Accuracy: 0.9991192014092778

Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12014
           1       0.99      1.00      0.99      1666
           2       1.00      1.00      1.00      1616
           3       1.00      1.00      1.00      8546

    accuracy                           1.00     23842
   macro avg       1.00      1.00      1.00     23842
weighted avg       1.00      1.00      1.00     23842



In [13]:
pickle.dump(model, open("tier_model.pkl", "wb"))
pickle.dump(tier_encoder, open("tier_encoder.pkl", "wb"))

print("✅ Model and encoder saved")

✅ Model and encoder saved


In [14]:
def validate_input(week_nights, weekend_nights, adr, cancellations):
    if not (0 <= week_nights <= 30):
        return False, "Week nights out of range"
    if not (0 <= weekend_nights <= 10):
        return False, "Weekend nights out of range"
    if not (20 <= adr <= 500):
        return False, "ADR out of range"
    if not (0 <= cancellations <= 20):
        return False, "Cancellations out of range"
    return True, "Valid input"

In [15]:
def predict_next_tier(
    week_nights,
    weekend_nights,
    adr,
    cancellations,
    current_tier
):
    valid, msg = validate_input(
        week_nights, weekend_nights, adr, cancellations
    )

    if not valid:
        return f"Invalid Input: {msg}"

    tier_encoded = tier_encoder.transform([current_tier])[0]

    input_data = np.array([[
        week_nights,
        weekend_nights,
        adr,
        cancellations,
        tier_encoded
    ]])

    prediction = model.predict(input_data)
    return tier_encoder.inverse_transform(prediction)[0]

In [16]:
result = predict_next_tier(
    week_nights=3,
    weekend_nights=2,
    adr=200,
    cancellations=0,
    current_tier="Silver"
)

print("Predicted Next Tier:", result)


Predicted Next Tier: Gold


