In [11]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [12]:
ROOM_TYPES = ["Standard", "Deluxe", "Executive", "Suite"]

ROOM_PRICES = {
    "Standard": 2000,
    "Deluxe": 4000,
    "Executive": 8000,
    "Suite": 12000
}

In [13]:
def generate_random_date(start_date, end_date):
    """
    Generates a random date between start_date and end_date
    """
    delta_days = (end_date - start_date).days
    random_days = np.random.randint(0, delta_days)
    return start_date + timedelta(days=random_days)


In [14]:
START_DATE = datetime(2024, 1, 1)
END_DATE = datetime(2025, 12, 31)

In [15]:
NUM_RECORDS = 10000

In [16]:
records = []

for booking_id in range(1, NUM_RECORDS + 1):

    room_type = np.random.choice(ROOM_TYPES)

    check_in = generate_random_date(START_DATE, END_DATE)
    stay_duration = np.random.randint(1, 8)  # 1 to 7 days
    check_out = check_in + timedelta(days=stay_duration)

    room_service_cost = np.random.randint(0, 3000)

    total_bill = (
        ROOM_PRICES[room_type] * stay_duration
        + room_service_cost
    )

    records.append([
        booking_id,
        room_type,
        check_in,
        check_out,
        stay_duration,
        ROOM_PRICES[room_type],
        room_service_cost,
        total_bill
    ])


In [17]:
columns = [
    "Booking_id",
    "Room_type",
    "Check_in_date",
    "Check_out_date",
    "Stay_duration",
    "Room_price",
    "Room_service_cost",
    "Total_bill"
]

df = pd.DataFrame(records, columns=columns)


In [18]:
df.head()

Unnamed: 0,Booking_id,Room_type,Check_in_date,Check_out_date,Stay_duration,Room_price,Room_service_cost,Total_bill
0,1,Standard,2025-07-23,2025-07-26,3,2000,1928,7928
1,2,Suite,2024-06-30,2024-07-03,3,12000,1692,37692
2,3,Suite,2025-04-30,2025-05-02,2,12000,43,24043
3,4,Standard,2024-07-27,2024-08-03,7,2000,1358,15358
4,5,Standard,2024-06-14,2024-06-18,4,2000,1413,9413


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Booking_id         10000 non-null  int64         
 1   Room_type          10000 non-null  object        
 2   Check_in_date      10000 non-null  datetime64[ns]
 3   Check_out_date     10000 non-null  datetime64[ns]
 4   Stay_duration      10000 non-null  int64         
 5   Room_price         10000 non-null  int64         
 6   Room_service_cost  10000 non-null  int64         
 7   Total_bill         10000 non-null  int64         
dtypes: datetime64[ns](2), int64(5), object(1)
memory usage: 625.1+ KB


In [20]:
df.to_csv("hotel_bookings.csv", index=False)