### Step 1: Load the Data
Load the dummy data from the CSV file.

In [2]:
import pandas as pd

# Load the dummy data
dummy_df = pd.read_csv('dummy_reservations.csv')
print(dummy_df.head())


   reservation_id  unit_id unit_type        date  head_count  host_user  \
0               1       67     type2  2023-07-01         128     930700   
1               2       58     type1  2023-08-11          80     516551   
2               3       38     type1  2023-05-14          42     465393   
3               4       19     type2  2023-01-15          49     801761   
4               5       70     type3  2023-05-13         107     216315   

  option1  option2  prop_id   reserve_code  slot_length  slot_minutes  \
0    Seat      NaN       96  202301-1282-1           10            30   
1    Seat      NaN       34  202301-1282-2           15            60   
2     NaN      NaN       34  202301-1282-3           15            60   
3    Seat      NaN       97  202301-1282-4           15            60   
4     NaN      NaN        8  202301-1282-5           15            60   

  special_req     status      time  time_slots  user_id  
0   Confirmed    Pending  14:00:00           3     8

#### Columns description:
- resevstion_id: Unique identifier for the reservation
- unit_id: Unique identifier for the table
- unit_type: Type of the table
- date: Date of the reservation
- head_count: Number of people in the reservation
- host_user: Unique identifier for the host
- option1: Option 1
- option2: Option 2
- prop_id: Unique identifier for the property
- reserve_code: Reservation code
- slot_minutes: Slot minutes
- special_req: Special request
- status: Status of the reservation
- time: Time of the reservation
- time_slots: Time slots
- user_id: Unique identifier for the user

In [5]:
dummy_df.shape

(500, 20)

In [7]:
dummy_df.dtypes #checking the data types

reservation_id             int64
unit_id                    int64
unit_type                  int32
date              datetime64[ns]
head_count                 int64
host_user                  int64
option1                    int32
option2                  float64
prop_id                    int64
reserve_code              object
slot_length                int64
slot_minutes               int64
special_req                int32
status                     int32
time                      object
time_slots                 int64
user_id                    int64
day                        int32
month                      int32
hour                       int32
dtype: object

In [9]:
dummy_df.isna().sum() #Checking null values

reservation_id      0
unit_id             0
unit_type           0
date                0
head_count          0
host_user           0
option1             0
option2           500
prop_id             0
reserve_code        0
slot_length         0
slot_minutes        0
special_req         0
status              0
time                0
time_slots          0
user_id             0
day                 0
month               0
hour                0
dtype: int64

In [None]:
#Deleting Unnnecessary Columns
#dropping column like "option1"



### Step 2: Preprocess the Data
This step includes converting date and time columns to the appropriate formats, extracting new features from them, and encoding categorical variables.

In [3]:
from sklearn.preprocessing import LabelEncoder

def preprocess_data(df):
    # Convert date and time to datetime
    df['date'] = pd.to_datetime(df['date'])
    df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S').dt.time
    
    # Extract features from date and time
    df['day'] = df['date'].dt.day
    df['month'] = df['date'].dt.month
    df['hour'] = pd.to_datetime(df['time'], format='%H:%M:%S').dt.hour
    
    # Encode categorical variables
    le_unit_type = LabelEncoder()
    df['unit_type'] = le_unit_type.fit_transform(df['unit_type'])
    
    le_special_req = LabelEncoder()
    df['special_req'] = le_special_req.fit_transform(df['special_req'])
    
    le_status = LabelEncoder()
    df['status'] = le_status.fit_transform(df['status'])
    
    le_option1 = LabelEncoder()
    df['option1'] = le_option1.fit_transform(df['option1'])
    
    return df, le_unit_type, le_special_req, le_status, le_option1

# Preprocess data
dummy_df, le_unit_type, le_special_req, le_status, le_option1 = preprocess_data(dummy_df)
print(dummy_df.head())


   reservation_id  unit_id  unit_type       date  head_count  host_user  \
0               1       67          1 2023-07-01         128     930700   
1               2       58          0 2023-08-11          80     516551   
2               3       38          0 2023-05-14          42     465393   
3               4       19          1 2023-01-15          49     801761   
4               5       70          2 2023-05-13         107     216315   

   option1  option2  prop_id   reserve_code  slot_length  slot_minutes  \
0        0      NaN       96  202301-1282-1           10            30   
1        0      NaN       34  202301-1282-2           15            60   
2        1      NaN       34  202301-1282-3           15            60   
3        0      NaN       97  202301-1282-4           15            60   
4        1      NaN        8  202301-1282-5           15            60   

   special_req  status      time  time_slots  user_id  day  month  hour  
0            1       2  14:00: