In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import os



In [2]:
def load_data():
    df = pd.read_csv("data/reservations.csv", parse_dates=["booking_date", "checkin_date"])
    df["days_until_checkin"] = (df["checkin_date"] - df["booking_date"]).dt.days
    df["room_type_encoded"] = df["room_type"].astype("category").cat.codes
    return df



In [7]:
def train_model(df, target_column):
    features = df[["lead_time", "num_guests", "days_until_checkin", "room_type_encoded"]]
    labels = df[target_column]
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"✅ {target_column.upper()} Prediction Report")
    print(classification_report(y_test, y_pred))



In [8]:
def main():
    df = load_data()
    train_model(df, "canceled")
    train_model(df, "no_show")

if __name__ == "__main__":
    main()

✅ CANCELED Prediction Report
              precision    recall  f1-score   support

           0       0.85      1.00      0.92     13831
           1       0.00      0.00      0.00      2411

    accuracy                           0.85     16242
   macro avg       0.43      0.50      0.46     16242
weighted avg       0.73      0.85      0.78     16242

✅ NO_SHOW Prediction Report
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     15524
           1       0.00      0.00      0.00       718

    accuracy                           0.96     16242
   macro avg       0.48      0.50      0.49     16242
weighted avg       0.91      0.96      0.93     16242

