In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
# Load the dataset
forecasting_data = pd.read_csv('../data/processed/dataset.csv')

# Filter for the product 'Milk' and location 'Koramangala'
product_data = forecasting_data[
    (forecasting_data['Product'] == 'Milk') &
    (forecasting_data['Location'] == 'Koramangala')
]

# Combine Date and Hour into a DateTime column
product_data['DateTime'] = pd.to_datetime(product_data['Date']) + pd.to_timedelta(product_data['Hour'], unit='h')

# Aggregate data by DateTime
aggregated_data = product_data.groupby('DateTime')['Order_Volume'].sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  product_data['DateTime'] = pd.to_datetime(product_data['Date']) + pd.to_timedelta(product_data['Hour'], unit='h')


In [7]:
def filter_data(data, product, location, frequency):
    # Filter by product and location
    filtered_data = data[(data["Product"] == product) & (data["Location"] == location)]

    # Convert 'Date' to datetime for time-based resampling
    filtered_data["Date"] = pd.to_datetime(filtered_data["Date"])

    # Handle frequency
    if frequency == "daily":
        resampled_data = filtered_data.groupby("Date")["Order_Volume"].sum().reset_index()
    elif frequency == "hourly":
        # Combine Date and Hour into a single datetime column
        filtered_data["Datetime"] = filtered_data["Date"] + pd.to_timedelta(filtered_data["Hour"], unit="h")
        resampled_data = filtered_data.groupby("Datetime")["Order_Volume"].sum().reset_index()
    elif frequency == "monthly":
        # Extract month and year for grouping
        filtered_data["Month"] = filtered_data["Date"].dt.to_period("M")
        resampled_data = filtered_data.groupby("Month")["Order_Volume"].sum().reset_index()
        resampled_data["Month"] = resampled_data["Month"].dt.to_timestamp()  # Convert period to timestamp
    else:
        raise ValueError("Invalid frequency. Choose from 'daily', 'hourly', or 'monthly'.")

    return resampled_data

# Example Usage
product = "Milk"
location = "Koramangala"
frequency = "daily"  # Can be 'daily', 'hourly', or 'monthly'
filtered_data = filter_data(forecasting_data, product, location, frequency)
print(filtered_data.head())


        Date  Order_Volume
0 2023-01-01          5019
1 2023-01-02          3714
2 2023-01-03          3839
3 2023-01-04          4911
4 2023-01-05          4970


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data["Date"] = pd.to_datetime(filtered_data["Date"])


In [8]:
!pip install prophet

Collecting prophet
  Downloading prophet-1.1.6-py3-none-win_amd64.whl.metadata (3.6 kB)
Collecting cmdstanpy>=1.0.4 (from prophet)
  Downloading cmdstanpy-1.2.5-py3-none-any.whl.metadata (4.0 kB)
Collecting holidays<1,>=0.25 (from prophet)
  Downloading holidays-0.64-py3-none-any.whl.metadata (26 kB)
Collecting tqdm>=4.36.1 (from prophet)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 0.0/57.7 kB ? eta -:--:--
     ------------- ------------------------ 20.5/57.7 kB 682.7 kB/s eta 0:00:01
     --------------------------------- ---- 51.2/57.7 kB 890.4 kB/s eta 0:00:01
     -------------------------------------- 57.7/57.7 kB 608.1 kB/s eta 0:00:00
Collecting stanio<2.0.0,>=0.4.0 (from cmdstanpy>=1.0.4->prophet)
  Downloading stanio-0.5.1-py3-none-any.whl.metadata (1.6 kB)
Downloading prophet-1.1.6-py3-none-win_amd64.whl (13.3 MB)
   ---------------------------------------- 0.0/13.3 MB ? eta -:--:--
   ----------------------------

In [10]:
def filter_data(data, products=None, locations=None, frequency="daily"):
    # Filter by products and locations
    if products and products != "all":
        data = data[data["Product"].isin(products)]
    if locations and locations != "all":
        data = data[data["Location"].isin(locations)]

    # Convert 'Date' to datetime for time-based resampling
    data["Date"] = pd.to_datetime(data["Date"])

    # Handle frequency
    if frequency == "daily":
        resampled_data = data.groupby(["Date", "Product", "Location"])["Order_Volume"].sum().reset_index()
    elif frequency == "hourly":
        # Combine Date and Hour into a single datetime column
        data["Datetime"] = data["Date"] + pd.to_timedelta(data["Hour"], unit="h")
        resampled_data = data.groupby(["Datetime", "Product", "Location"])["Order_Volume"].sum().reset_index()
    elif frequency == "monthly":
        # Extract month and year for grouping
        data["Month"] = data["Date"].dt.to_period("M")
        resampled_data = data.groupby(["Month", "Product", "Location"])["Order_Volume"].sum().reset_index()
        resampled_data["Month"] = resampled_data["Month"].dt.to_timestamp()  # Convert period to timestamp
    else:
        raise ValueError("Invalid frequency. Choose from 'daily', 'hourly', or 'monthly'.")

    return resampled_data

# Example Usage
products = ["Milk", "Bread"]  # List of products
locations = ["Koramangala", "Indiranagar"]  # List of locations
frequency = "daily"  # Can be 'daily', 'hourly', or 'monthly'
filtered_data = filter_data(forecasting_data, products, locations, frequency)
print(filtered_data.head())


        Date Product     Location  Order_Volume
0 2023-01-01   Bread  Indiranagar          4833
1 2023-01-01   Bread  Koramangala          4960
2 2023-01-01    Milk  Indiranagar          4953
3 2023-01-01    Milk  Koramangala          5019
4 2023-01-02   Bread  Indiranagar          3693


In [11]:
from prophet import Prophet

def forecast_for_all(filtered_data, frequency):
    forecasts = {}
    
    # Group by Product and Location
    grouped_data = filtered_data.groupby(["Product", "Location"])

    for (product, location), group in grouped_data:
        group = group[["Date" if frequency == "daily" else "Datetime", "Order_Volume"]]
        group.columns = ["ds", "y"]  # Rename columns for Prophet

        # Train and forecast
        model = Prophet()
        model.fit(group)
        future = model.make_future_dataframe(periods=30, freq="D" if frequency == "daily" else "H")
        forecast = model.predict(future)

        # Save the forecast for this product-location pair
        forecasts[(product, location)] = forecast[["ds", "yhat"]].to_dict(orient="records")

    return forecasts

# Example Usage
frequency = "daily"
forecast_results = forecast_for_all(filtered_data, frequency)
print(forecast_results)  # Contains forecasts for all product-location pairs


18:08:03 - cmdstanpy - INFO - Chain [1] start processing
18:08:03 - cmdstanpy - INFO - Chain [1] done processing
18:08:03 - cmdstanpy - INFO - Chain [1] start processing
18:08:03 - cmdstanpy - INFO - Chain [1] done processing
18:08:04 - cmdstanpy - INFO - Chain [1] start processing
18:08:04 - cmdstanpy - INFO - Chain [1] done processing
18:08:04 - cmdstanpy - INFO - Chain [1] start processing
18:08:04 - cmdstanpy - INFO - Chain [1] done processing


{('Bread', 'Indiranagar'): [{'ds': Timestamp('2023-01-01 00:00:00'), 'yhat': 4933.747274128648}, {'ds': Timestamp('2023-01-02 00:00:00'), 'yhat': 3790.178228309451}, {'ds': Timestamp('2023-01-03 00:00:00'), 'yhat': 3848.550609446311}, {'ds': Timestamp('2023-01-04 00:00:00'), 'yhat': 4967.55178834245}, {'ds': Timestamp('2023-01-05 00:00:00'), 'yhat': 4968.668591166777}, {'ds': Timestamp('2023-01-06 00:00:00'), 'yhat': 3805.7967795406585}, {'ds': Timestamp('2023-01-07 00:00:00'), 'yhat': 4936.924495315373}, {'ds': Timestamp('2023-01-08 00:00:00'), 'yhat': 4934.257635298876}, {'ds': Timestamp('2023-01-09 00:00:00'), 'yhat': 3790.688589358542}, {'ds': Timestamp('2023-01-10 00:00:00'), 'yhat': 3849.060970377468}, {'ds': Timestamp('2023-01-11 00:00:00'), 'yhat': 4968.062148624971}, {'ds': Timestamp('2023-01-12 00:00:00'), 'yhat': 4969.1789508105185}, {'ds': Timestamp('2023-01-13 00:00:00'), 'yhat': 3806.3071386636143}, {'ds': Timestamp('2023-01-14 00:00:00'), 'yhat': 4937.434853918796}, {'ds

In [13]:
import requests

response = requests.get("http://127.0.0.1:8000/forecast?products=Milk&locations=Koramangala&frequency=daily")
data = response.json()
print(data)


{'status': 'success', 'data': [{'product': 'Milk', 'location': 'Koramangala', 'forecast': [{'ds': '2023-05-20', 'yhat': 4949.878153602387}, {'ds': '2023-05-21', 'yhat': 4946.268761846841}, {'ds': '2023-05-22', 'yhat': 3805.722863801574}, {'ds': '2023-05-23', 'yhat': 3831.2751750303205}, {'ds': '2023-05-24', 'yhat': 4933.776608698036}, {'ds': '2023-05-25', 'yhat': 4968.714096231776}, {'ds': '2023-05-26', 'yhat': 3845.155398533226}, {'ds': '2023-05-27', 'yhat': 4948.728870539021}, {'ds': '2023-05-28', 'yhat': 4945.119478783585}, {'ds': '2023-05-29', 'yhat': 3804.5735807377623}, {'ds': '2023-05-30', 'yhat': 3830.125891964181}, {'ds': '2023-05-31', 'yhat': 4932.627325631173}, {'ds': '2023-06-01', 'yhat': 4967.564813166619}, {'ds': '2023-06-02', 'yhat': 3844.006115469245}, {'ds': '2023-06-03', 'yhat': 4947.579587475653}, {'ds': '2023-06-04', 'yhat': 4943.9701957181705}, {'ds': '2023-06-05', 'yhat': 3803.424297672656}, {'ds': '2023-06-06', 'yhat': 3828.9766089000295}, {'ds': '2023-06-07', 'y