In [156]:
import pandas as pd
import numpy as np

In [157]:
sales = pd.read_csv(r"F:\GUVI\Project\Dominos - Predictive Purchase Order System\Cleaned_DATA\Pizza_sales.csv", index_col=0)
ingredients = pd.read_csv(r"F:\GUVI\Project\Dominos - Predictive Purchase Order System\Cleaned_DATA\ingredients_data.csv", index_col=0)

In [158]:
sales.head()

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name
0,1,1,hawaiian_m,1,1/1/2015,11:38:36,13.25,13.25,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza
1,2,2,classic_dlx_m,1,1/1/2015,11:57:40,16.0,16.0,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza
2,3,2,five_cheese_l,1,1/1/2015,11:57:40,18.5,18.5,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza
3,4,2,ital_supr_l,1,1/1/2015,11:57:40,20.75,20.75,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza
4,5,2,mexicana_m,1,1/1/2015,11:57:40,16.0,16.0,M,Veggie,"Tomatoes, Red Peppers, Jalapeno Peppers, Red O...",The Mexicana Pizza


In [159]:
ingredients.head()

Unnamed: 0,pizza_name_id,pizza_name,pizza_ingredients,Items_Qty_In_Grams
0,bbq_ckn_l,The Barbecue Chicken Pizza,Barbecued Chicken,40.0
1,bbq_ckn_l,The Barbecue Chicken Pizza,Red Peppers,15.0
2,bbq_ckn_l,The Barbecue Chicken Pizza,Green Peppers,20.0
3,bbq_ckn_l,The Barbecue Chicken Pizza,Tomatoes,30.0
4,bbq_ckn_l,The Barbecue Chicken Pizza,Red Onions,60.0


In [160]:
# Convert order_date to datetime
sales['order_date'] = pd.to_datetime(sales['order_date'], format='mixed')

In [161]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
Index: 48620 entries, 0 to 48619
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   pizza_id           48620 non-null  int64         
 1   order_id           48620 non-null  int64         
 2   pizza_name_id      48620 non-null  object        
 3   quantity           48620 non-null  int64         
 4   order_date         48620 non-null  datetime64[ns]
 5   order_time         48620 non-null  object        
 6   unit_price         48620 non-null  float64       
 7   total_price        48620 non-null  float64       
 8   pizza_size         48620 non-null  object        
 9   pizza_category     48620 non-null  object        
 10  pizza_ingredients  48620 non-null  object        
 11  pizza_name         48620 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(3), object(6)
memory usage: 4.8+ MB


In [162]:
# Feature Engineering: Create new columns
sales['day_of_week'] = sales['order_date'].dt.day_name()
sales['month'] = sales['order_date'].dt.month
sales['week_of_year'] = sales['order_date'].dt.isocalendar().week

In [163]:
sales.head()

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name,day_of_week,month,week_of_year
0,1,1,hawaiian_m,1,2015-01-01,11:38:36,13.25,13.25,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza,Thursday,1,1
1,2,2,classic_dlx_m,1,2015-01-01,11:57:40,16.0,16.0,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza,Thursday,1,1
2,3,2,five_cheese_l,1,2015-01-01,11:57:40,18.5,18.5,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza,Thursday,1,1
3,4,2,ital_supr_l,1,2015-01-01,11:57:40,20.75,20.75,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza,Thursday,1,1
4,5,2,mexicana_m,1,2015-01-01,11:57:40,16.0,16.0,M,Veggie,"Tomatoes, Red Peppers, Jalapeno Peppers, Red O...",The Mexicana Pizza,Thursday,1,1


In [164]:
# Aggregate sales data by day
sales_data = sales.groupby(['order_date', 'pizza_name_id'])['quantity'].sum().reset_index()
sales_data.head()

Unnamed: 0,order_date,pizza_name_id,quantity
0,2015-01-01,bbq_ckn_l,6
1,2015-01-01,bbq_ckn_m,4
2,2015-01-01,bbq_ckn_s,1
3,2015-01-01,big_meat_s,5
4,2015-01-01,calabrese_m,1


In [165]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_percentage_error

In [166]:
# Prepare the data for Prophet (group by pizza_id)
forecast_results = [] # Initialize list to store forecast results
actuals = []  # Initialize list to store actual test values for MAPE calculation
predictions = []  # Initialize list to store predicted values

In [167]:
# Loop through each pizza_name_id and forecast
for pizza_id in sales_data['pizza_name_id'].unique():
    pizza_sales = sales_data[sales_data['pizza_name_id'] == pizza_id]
    
    # Check if there are enough data points
    if len(pizza_sales) > 5:  # Ensure we have sufficient data points

        # Split the data into training and testing sets
        train_size = int(len(pizza_sales) * 0.8)  # Use 80% of data for training
        train, test = pizza_sales['quantity'][:train_size], pizza_sales['quantity'][train_size:]


        # Fit the model
        model = ARIMA(train, order=(1, 0, 1))
        model_fit = model.fit()

        # Forecast for the next period (length of test set)
        forecasted_data = model_fit.forecast(steps=len(test))

        # Store actual and predicted values for MAPE calculation
        actuals.extend(test)
        predictions.extend(forecasted_data)

        # Forecast for the next 7 days (or your desired period)
        forecast_next_7_days = model_fit.forecast(steps=7)  # Forecasting the next 7 time points

        # Create a DataFrame for the forecast
        forecast_df = pd.DataFrame({
            'order_date': pd.date_range(start=pizza_sales['order_date'].max() + pd.Timedelta(days=1), periods=7),  # Create new dates
            'forecasted_sales': forecast_next_7_days,
            'pizza_name_id': pizza_id
        })
        
        # Append the results to the list
        forecast_results.append(forecast_df)

# Concatenate all forecast results into a single DataFrame
final_forecast_df = pd.concat(forecast_results, ignore_index=True)


  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return 

In [168]:
# Calculate overall MAPE
overall_mape = mean_absolute_percentage_error(actuals, predictions)
print(f"Overall MAPE across all pizza types: {overall_mape:.5f}")

Overall MAPE across all pizza types: 0.54102


In [169]:
final_forecast_df.head(10)

Unnamed: 0,order_date,forecasted_sales,pizza_name_id
0,2016-01-01,3.105511,bbq_ckn_l
1,2016-01-02,3.051928,bbq_ckn_l
2,2016-01-03,3.041888,bbq_ckn_l
3,2016-01-04,3.040006,bbq_ckn_l
4,2016-01-05,3.039654,bbq_ckn_l
5,2016-01-06,3.039588,bbq_ckn_l
6,2016-01-07,3.039575,bbq_ckn_l
7,2016-01-01,3.131519,bbq_ckn_m
8,2016-01-02,2.895041,bbq_ckn_m
9,2016-01-03,2.88181,bbq_ckn_m


In [170]:
# Initialize a dictionary to hold the total ingredient requirements
ingredient_requirements = {}

# Loop through the forecasted quantities to calculate ingredient needs
for index, row in final_forecast_df.iterrows():
    pizza_id = row['pizza_name_id']
    predicted_quantity = row['forecasted_sales']
    
    # Find the ingredients for the corresponding pizza_id
    pizza_ingredients = ingredients[ingredients['pizza_name_id'] == pizza_id]
    
    for _, ingredient_row in pizza_ingredients.iterrows():
        ingredient = ingredient_row['pizza_ingredients']
        qty_per_pizza = ingredient_row['Items_Qty_In_Grams']
        
        total_qty = predicted_quantity * qty_per_pizza
        
        if ingredient not in ingredient_requirements:
            ingredient_requirements[ingredient] = 0
        ingredient_requirements[ingredient] += total_qty

# Convert ingredient requirements to a DataFrame for easier handling
ingredient_order = pd.DataFrame(list(ingredient_requirements.items()), columns=['Ingredient', 'Total_Quantity'])


In [171]:
ingredient_order

Unnamed: 0,Ingredient,Total_Quantity
0,Barbecued Chicken,1730.269795
1,Red Peppers,4483.173916
2,Green Peppers,2437.559190
3,Tomatoes,15861.997335
4,Red Onions,23225.110981
...,...,...
59,Brie Carre Cheese,385.845570
60,Prosciutto,385.845570
61,Caramelized Onions,0.000000
62,Pears,128.615190
