In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from datetime import datetime, timedelta
import random

In [3]:
# Initialize dishes
dishes = ["Burger", "Pizza", "Sandwich", "Pasta"]


In [11]:
def simulate_hourly_orders(hour, dish_index):
    """
    Simulate actual data for a dish based on day, hour, and randomness.
    """
    base_demand = {
        0: 50,  # Burger base demand
        1: 40,  # Pizza base demand
        2: 30,  # Sandwich base demand
        3: 20,  # Pasta base demand
    }
    # Adjust demand by hour of day (higher during lunch/dinner hours)
    if 11 <= hour <= 14 or 18 <= hour <= 21:
        return int(base_demand[dish_index] * random.uniform(0.8, 1.2))
    else:
        return int(base_demand[dish_index] * random.uniform(0.4, 0.6))

In [10]:
# Generate actual data with Date
def simulate_actual_data(start_date, num_days, dishes):
    """
    Generate actual data with 'Date', 'Hour', 'Dish', and 'Amount Ordered'.
    """
    data = []
    for day_offset in range(num_days):
        current_date = start_date + timedelta(days=day_offset)
        day_of_week = current_date.weekday()  # 0 = Monday, ..., 6 = Sunday
        for hour in range(24):
            for dish_index, dish in enumerate(dishes):
                # Simulate amount ordered
                amount = simulate_hourly_orders(hour, dish_index)
                data.append([current_date, day_of_week, hour, dish_index, amount])
    return pd.DataFrame(data, columns=["Date", "Day", "Hour", "Dish", "Amount Ordered"])

In [12]:
# Generate actual data for 7 days
start_date = datetime(2025, 1, 1)  # Start on January 1, 2025
num_days = 7
actual_data = simulate_actual_data(start_date, num_days, dishes)

In [14]:
actual_data.tail(100)

Unnamed: 0,Date,Day,Hour,Dish,Amount Ordered
572,2025-01-06,0,23,0,21
573,2025-01-06,0,23,1,19
574,2025-01-06,0,23,2,16
575,2025-01-06,0,23,3,8
576,2025-01-07,1,0,0,23
...,...,...,...,...,...
667,2025-01-07,1,22,3,10
668,2025-01-07,1,23,0,25
669,2025-01-07,1,23,1,17
670,2025-01-07,1,23,2,12


In [15]:
# Preprocess data for the model
X = actual_data[["Day", "Hour", "Dish"]]
y = actual_data["Amount Ordered"]

# Split into training (first 6 days) and test (last day for predictions)
train_data = actual_data[actual_data["Date"] < start_date + timedelta(days=6)]
test_data = actual_data[actual_data["Date"] >= start_date + timedelta(days=6)]

X_train = train_data[["Day", "Hour", "Dish"]]
y_train = train_data["Amount Ordered"]

X_test = test_data[["Day", "Hour", "Dish"]]
y_test = test_data["Amount Ordered"]

In [25]:

# Continuous training setup (SGDRegressor)
# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("day_hour", StandardScaler(), ["Day", "Hour"]),
        ("dish", OneHotEncoder(), ["Dish"]),
    ]
)

# Create pipeline
sgd_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", SGDRegressor(max_iter=1000, tol=1e-3, learning_rate="adaptive")),
])

In [31]:
# Continuous learning function
def continuous_training_pipeline(initial_data, pipeline, prediction_horizon=7):
    """
    Perform continuous training and prediction with separate prediction dataset.
    """
    # Initialize training data (first 7 days)
    X_train = initial_data[["Day", "Hour", "Dish"]]
    y_train = initial_data["Amount Ordered"]
    
    # Train the model on the initial 7 days
    pipeline.fit(X_train, y_train)
    print("Initial training complete.")
    
    # Create a prediction dataset for the next 7 days
    current_date = initial_data["Date"].max()
    prediction_data = []
    for day_offset in range(1, prediction_horizon + 1):
        prediction_date = current_date + timedelta(days=day_offset)
        day_of_week = prediction_date.weekday()
        for hour in range(24):
            for dish_index, dish in enumerate(dishes):
                prediction_data.append([prediction_date, day_of_week, hour, dish_index])
    prediction_data = pd.DataFrame(prediction_data, columns=["Date", "Day", "Hour", "Dish"])
    prediction_data["Predicted Amount"] = pipeline.predict(prediction_data[["Day", "Hour", "Dish"]])
    
    print("\nInitial Predictions:")
    print(prediction_data)
    
    # Simulate continuous training
    while True:
        # Simulate receiving actual data for the next day
        actual_date = current_date + timedelta(days=1)
        actual_data = simulate_actual_data(actual_date, 1, dishes)
        
        # Compare actual data with predictions
        comparison = pd.merge(actual_data, prediction_data, on=["Day", "Hour", "Dish"], suffixes=("_Actual", "_Predicted"))
        print(f"\nComparison for {actual_date}:")
        print(comparison)
        
        # Update the model with the actual data
        X_actual = actual_data[["Day", "Hour", "Dish"]]
        y_actual = actual_data["Amount Ordered"]
        pipeline.named_steps["regressor"].partial_fit(X_actual, y_actual)
        print(f"Model updated with actual data for {actual_date}.")
        
        # Update predictions for the next 7 days
        prediction_data = []
        for day_offset in range(1, prediction_horizon + 1):
            prediction_date = actual_date + timedelta(days=day_offset)
            day_of_week = prediction_date.weekday()
            for hour in range(24):
                for dish_index, dish in enumerate(dishes):
                    prediction_data.append([prediction_date, day_of_week, hour, dish_index])
        prediction_data = pd.DataFrame(prediction_data, columns=["Date", "Day", "Hour", "Dish"])
        prediction_data["Predicted Amount"] = pipeline.predict(prediction_data[["Day", "Hour", "Dish"]])
        print(f"\nUpdated Predictions:")
        print(prediction_data)
        
        # Move to the next day
        current_date = actual_date

In [32]:
# Generate 14 days of data (7 for initial training + simulate next days)
start_date = datetime(2025, 1, 1)
all_data = simulate_actual_data(start_date, 14, dishes)

In [33]:
# Split into initial training data (7 days)
initial_data = all_data[all_data["Date"] < start_date + timedelta(days=7)]


In [34]:
# Run the continuous training pipeline
continuous_training_pipeline(initial_data, sgd_pipeline)

Initial training complete.

Predictions for 2025-01-08 00:00:00 to 2025-01-14 00:00:00:
     Day  Hour  Dish  Predicted Amount
0      2     0     0         26.603633
1      2     0     1         19.670511
2      2     0     2         13.047567
3      2     0     3          6.528882
4      2     1     0         27.157738
..   ...   ...   ...               ...
667    1    22     3         18.683897
668    1    23     0         39.312753
669    1    23     1         32.379632
670    1    23     2         25.756688
671    1    23     3         19.238003

[672 rows x 4 columns]




ValueError: X has 3 features, but SGDRegressor is expecting 6 features as input.

In [22]:
# Predict for the next 7 days
future_days = 7
future_data = []
for future_day_offset in range(7):
    future_date = start_date + timedelta(days=6 + future_day_offset)
    future_day_of_week = future_date.weekday()
    for hour in range(24):
        for dish_index, dish in enumerate(dishes):
            future_data.append([future_day_of_week, hour, dish_index])

future_df = pd.DataFrame(future_data, columns=["Day", "Hour", "Dish"])
future_predictions = sgd_pipeline.predict(future_df)

In [23]:
# Add predictions to the future dataset
future_df["Predicted Amount"] = future_predictions




Predictions for Next 7 Days:
    Day  Hour  Dish  Predicted Amount
0    1     0     0         26.364147
1    1     0     1         19.422799
2    1     0     2         13.194011
3    1     0     3          6.187893
4    1     1     0         26.936111


In [24]:
# Display predictions for the next 7 days
print("\nPredictions for Next 7 Days:\n", future_df.head(20))


Predictions for Next 7 Days:
     Day  Hour  Dish  Predicted Amount
0     1     0     0         26.364147
1     1     0     1         19.422799
2     1     0     2         13.194011
3     1     0     3          6.187893
4     1     1     0         26.936111
5     1     1     1         19.994763
6     1     1     2         13.765976
7     1     1     3          6.759857
8     1     2     0         27.508075
9     1     2     1         20.566727
10    1     2     2         14.337940
11    1     2     3          7.331822
12    1     3     0         28.080039
13    1     3     1         21.138691
14    1     3     2         14.909904
15    1     3     3          7.903786
16    1     4     0         28.652003
17    1     4     1         21.710655
18    1     4     2         15.481868
19    1     4     3          8.475750
