In [18]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from datetime import datetime, timedelta
import random

In [28]:
# Initialize dishes
dishes = ["Burger", "Pizza", "Sandwich", "Pasta"]


In [29]:
def simulate_hourly_orders(hour, dish_index):
    """
    Simulate actual data for a dish based on day, hour, and randomness.
    """
    base_demand = {
        0: 50,  # Burger base demand
        1: 40,  # Pizza base demand
        2: 30,  # Sandwich base demand
        3: 20,  # Pasta base demand
    }
    # Adjust demand by hour of day (higher during lunch/dinner hours)
    if 11 <= hour <= 14 or 18 <= hour <= 21:
        return int(base_demand[dish_index] * random.uniform(0.8, 1.2))
    else:
        return int(base_demand[dish_index] * random.uniform(0.4, 0.6))

In [30]:
# Generate actual data with Date
def simulate_actual_data(start_date, num_days, dishes):
    """
    Generate actual data with 'Date', 'Hour', 'Dish', and 'Amount Ordered'.
    """
    data = []
    for day_offset in range(num_days):
        current_date = start_date + timedelta(days=day_offset)
        day_of_week = current_date.weekday()  # 0 = Monday, ..., 6 = Sunday
        for hour in range(24):
            for dish_index, dish in enumerate(dishes):
                # Simulate amount ordered
                amount = simulate_hourly_orders(hour, dish_index)
                data.append([current_date, day_of_week, hour, dish_index, amount])

    return pd.DataFrame(data, columns=["Date", "Day", "Hour", "Dish", "Amount Ordered"])

In [31]:

# Continuous training setup (SGDRegressor)
# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("day_hour", StandardScaler(), ["Day", "Hour"]),
        ("dish", OneHotEncoder(), ["Dish"]),
    ]
)

# Create pipeline
sgd_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", SGDRegressor(max_iter=1000, tol=1e-3, learning_rate="adaptive")),
])

In [32]:
def continuous_training_pipeline(initial_data, pipeline, prediction_horizon=7, max_days=15):
    """
    Perform continuous training and prediction with separate prediction dataset.
    Stops after a specified number of simulation days (max_days).
    """
    # Initialize training data (first 7 days)
    X_train = initial_data[["Day", "Hour", "Dish"]]
    y_train = initial_data["Amount Ordered"]

    print(X_train.shape)
    
    # Train the model on the initial 7 days
    pipeline.fit(X_train, y_train)
    print("Initial training complete.")
    
    # Create a prediction dataset for the next 7 days (for day of week, hour, and dish combinations)
    prediction_data = []
    for day_of_week in range(7):  # 7 days in the week (Day of the Week)
        for hour in range(24):  # 24 hours in a day
            for dish_index in range(len(dishes)):  # Assuming you have a list of dishes
                prediction_data.append([day_of_week, hour, dish_index])
    prediction_data = pd.DataFrame(prediction_data, columns=["Day", "Hour", "Dish"])
    prediction_data["Predicted Amount"] = pipeline.predict(prediction_data[["Day", "Hour", "Dish"]])
    
    print("\nInitial Predictions:")
    print(prediction_data)
    
    # Store actual data for each day (simulating real-time data collection)
    common_actual_data = initial_data.copy()
    
    # Initialize current_date to the first day of the next 7-day prediction period
    current_date = datetime(2025, 1, 8)  # Example: starting from 1/8/2025, after 1/1 to 1/7 for initial training
    
    # Simulate continuous training for a specified number of days (max_days)
    day_counter = 0
    while day_counter < max_days:
        # Simulate receiving actual data for the next day
        actual_date = current_date + timedelta(days=1)
        
        # Simulate actual data (this would come from real-time data)
        actual_data = simulate_actual_data(actual_date, 1, dishes)
        
        # Add the actual data of the current day to the common_actual_data
        common_actual_data = pd.concat([common_actual_data, actual_data], ignore_index=True)
        
        # Compare actual data with predictions for the current day only
        # We no longer care about the date, only the Day, Hour, Dish combinations
        actual_for_current_day = actual_data
        
        # Merge actual data and predicted data for comparison (only for the current day)
        comparison = pd.merge(actual_for_current_day, prediction_data, on=["Day", "Hour", "Dish"], suffixes=("_Actual", "_Predicted"))
        print(f"\nComparison for {actual_date}:")
        print(comparison)
        
        # Update the model with the actual data for the current day
        X_actual = actual_for_current_day[["Day", "Hour", "Dish"]]
        y_actual = actual_for_current_day["Amount Ordered"]
        
        # Preprocess the input data before updating the model
        X_actual_preprocessed = pipeline.named_steps["preprocessor"].transform(X_actual)
        pipeline.named_steps["regressor"].partial_fit(X_actual_preprocessed, y_actual)
        print(f"Model updated with actual data for {actual_date}.")
        
        # Update the predictions for the current day (based on the Day, Hour, Dish combinations)
        for _, row in actual_for_current_day.iterrows():
            day_of_week = row["Day"]
            hour = row["Hour"]
            dish = row["Dish"]
            
            # Convert this into a DataFrame with column names for prediction
            input_data = pd.DataFrame([[day_of_week, hour, dish]], columns=["Day", "Hour", "Dish"])
            
            # Predict the amount for this specific day, hour, and dish
            predicted_amount = pipeline.predict(input_data)[0]
            
            # Update the corresponding row in the prediction_data DataFrame
            prediction_data.loc[(prediction_data["Day"] == day_of_week) & 
                                 (prediction_data["Hour"] == hour) & 
                                 (prediction_data["Dish"] == dish), "Predicted Amount"] = predicted_amount
        
        print(f"Predictions updated for {actual_date}.")
        
        # Move to the next day
        current_date = actual_date  # Increment current_date by 1 day
        day_counter += 1

    print(f"Training stopped after {max_days} days.")
    return prediction_data


In [33]:
# Generate 14 days of data (7 for initial training + simulate next days)
start_date = datetime(2025, 1, 1)
all_data = simulate_actual_data(start_date, 7, dishes)
all_data.head()

Unnamed: 0,Date,Day,Hour,Dish,Amount Ordered
0,2025-01-01,2,0,0,21
1,2025-01-01,2,0,1,17
2,2025-01-01,2,0,2,17
3,2025-01-01,2,0,3,10
4,2025-01-01,2,1,0,29


In [34]:

# Split into initial training data (7 days)
initial_data = all_data[all_data["Date"] < start_date + timedelta(days=7)]


In [35]:
pred = continuous_training_pipeline(all_data, sgd_pipeline, max_days=15)


(672, 3)
Initial training complete.

Initial Predictions:
     Day  Hour  Dish  Predicted Amount
0      0     0     0         26.290739
1      0     0     1         19.365719
2      0     0     2         12.885727
3      0     0     3          6.232287
4      0     1     0         26.830184
..   ...   ...   ...               ...
667    6    22     3         18.887851
668    6    23     0         39.485748
669    6    23     1         32.560728
670    6    23     2         26.080735
671    6    23     3         19.427295

[672 rows x 4 columns]

Comparison for 2025-01-09 00:00:00:
         Date  Day  Hour  Dish  Amount Ordered  Predicted Amount
0  2025-01-09    3     0     0              24         26.684635
1  2025-01-09    3     0     1              18         19.759615
2  2025-01-09    3     0     2              17         13.279622
3  2025-01-09    3     0     3              10          6.626183
4  2025-01-09    3     1     0              24         27.224079
..        ...  ...   ..

In [27]:
pred

Unnamed: 0,Day,Hour,Dish,Predicted Amount
0,0,0,0,25.750795
1,0,0,1,19.489869
2,0,0,2,12.243768
3,0,0,3,6.123714
4,0,1,0,26.265319
...,...,...,...,...
667,6,22,3,19.093298
668,6,23,0,39.391312
669,6,23,1,33.043057
670,6,23,2,25.882377
