In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score

# Load the dummy data
dummy_df = pd.read_csv('dummy_reservations.csv')

# Data Preparation
def prepare_data(df):
    # Convert date and time to datetime
    df['date'] = pd.to_datetime(df['date'])
    df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S').dt.time
    
    # Extract features from date and time
    df['day'] = df['date'].dt.day
    df['month'] = df['date'].dt.month
    df['hour'] = pd.to_datetime(df['time'], format='%H:%M:%S').dt.hour
    
    # Encode categorical variables
    le_unit_type = LabelEncoder()
    df['unit_type'] = le_unit_type.fit_transform(df['unit_type'])
    
    le_special_req = LabelEncoder()
    df['special_req'] = le_special_req.fit_transform(df['special_req'])
    
    le_status = LabelEncoder()
    df['status'] = le_status.fit_transform(df['status'])
    
    le_option1 = LabelEncoder()
    df['option1'] = le_option1.fit_transform(df['option1'])
    
    return df, le_unit_type, le_special_req, le_status, le_option1

# Prepare data
dummy_df, le_unit_type, le_special_req, le_status, le_option1 = prepare_data(dummy_df)

# Split data into features and target variables
X = dummy_df[['unit_id', 'unit_type', 'day', 'month', 'hour', 'host_user', 'option1', 'prop_id', 'slot_length', 'slot_minutes', 'special_req', 'status', 'time_slots', 'user_id']]
y_head_count = dummy_df['head_count']
y_unit_id = dummy_df['unit_id']

# Split into training and testing sets
X_train, X_test, y_head_count_train, y_head_count_test = train_test_split(X, y_head_count, test_size=0.2, random_state=42)
X_train_cls, X_test_cls, y_unit_id_train, y_unit_id_test = train_test_split(X, y_unit_id, test_size=0.2, random_state=42)

# Train Regression Model for Head Count Prediction
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_head_count_train)

# Train Classification Model for Most Demanded Table Prediction
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train_cls, y_unit_id_train)

# Predict Total Head Count
y_head_count_pred = regressor.predict(X_test)
mse = mean_squared_error(y_head_count_test, y_head_count_pred)
print(f"Mean Squared Error for Head Count Prediction: {mse}")

# Predict Most Demanded Table
y_unit_id_pred = classifier.predict(X_test_cls)
accuracy = accuracy_score(y_unit_id_test, y_unit_id_pred)
print(f"Accuracy for Most Demanded Table Prediction: {accuracy}")

# Function to predict total head count and most demanded table at a given date and time
def predict_head_count_and_demanded_table(df, date, time):
    # Prepare input data
    date = pd.to_datetime(date)
    time = pd.to_datetime(time, format='%H:%M:%S').time()
    
    day = date.day
    month = date.month
    hour = pd.to_datetime(time, format='%H:%M:%S').hour
    
    # Create a single row dataframe for prediction
    input_data = {
        'unit_id': [1],  # Dummy value, not used for prediction
        'unit_type': [0],  # Dummy value, replace with appropriate encoded value
        'day': [day],
        'month': [month],
        'hour': [hour],
        'host_user': [100000],  # Dummy value, not used for prediction
        'option1': [0],  # Dummy value, replace with appropriate encoded value
        'prop_id': [1],  # Dummy value, not used for prediction
        'slot_length': [10],  # Dummy value, replace with appropriate value
        'slot_minutes': [30],  # Dummy value, replace with appropriate value
        'special_req': [0],  # Dummy value, replace with appropriate encoded value
        'status': [0],  # Dummy value, replace with appropriate encoded value
        'time_slots': [1],  # Dummy value, not used for prediction
        'user_id': [1000]  # Dummy value, not used for prediction
    }
    
    input_df = pd.DataFrame(input_data)
    
    # Predict total head count
    total_head_count = regressor.predict(input_df)[0]
    
    # Predict most demanded table
    most_demanded_table = classifier.predict(input_df)[0]
    
    return total_head_count, most_demanded_table

# Example usage
date = '2024-04-19'
time = '21:00:00'
total_head_count, most_demanded_table = predict_head_count_and_demanded_table(dummy_df, date, time)

print(f"Total Head Count on {date} at {time}: {total_head_count}")
print(f"Most Demanded Table on {date} at {time}: {most_demanded_table}")


Mean Squared Error for Head Count Prediction: 1786.9252399999998
Accuracy for Most Demanded Table Prediction: 0.12
Total Head Count on 2024-04-19 at 21:00:00: 81.91
Most Demanded Table on 2024-04-19 at 21:00:00: 1
