### Convert product_sales.csv into ForecastedInformation.csv

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import ray
from prophet import Prophet

Importing plotly failed. Interactive plots will not work.


In [None]:
# Define policy optimization period for forecasting
policy_optimization_period = 5

### Clean the data

In [None]:
df = pd.read_csv("product_sales.csv")
# Drop random columns
df = df.drop(columns=["Unnamed: 107", "94"])
# Melt data and remove strings for week names
df = df.melt(id_vars=["Scode", "Pcode", "Price"], var_name="Week", value_name="Quantity Sold")
df["Week"] = df["Week"].str.extract('(\d+)').astype(int)
# Will contain (store, sku) pair
all_unique_combos = []
stores = df["Scode"].unique()
# How many stores do we want to forecast for
stores = stores[:1] 
for store in stores:
    holder = df[df["Scode"] == store]
    skus = holder["Pcode"].unique()
    for sku in skus:
        all_unique_combos.append((store, sku))


### Compute forecasts in parallel

In [None]:
# Parellelize this forecasting for speed
@ray.remote
def forecast_dates(df, pair):
    value_column = df[(df["Pcode"] == pair[1]) & (df["Scode"] == pair[0])].reset_index(drop=True)
    value_column = value_column["Quantity Sold"]
    start_date = "2023-01-01"  # Choose an arbitrary start date
    date_range = pd.date_range(start=start_date, periods=len(value_column), freq='W') 
    data = pd.DataFrame({'ds': date_range, 'y': value_column})
    model = Prophet(weekly_seasonality=True, yearly_seasonality=True)
    model.fit(data)
    future = model.make_future_dataframe(periods=policy_optimization_period, freq='W')
    forecast = model.predict(future)
    return data, forecast

In [None]:
ray.init(log_to_driver=False, ignore_reinit_error=True)
ds = ray.data.from_pandas(df).repartition(1)
block_refs = ds.get_internal_block_refs()
forecast_futures = [
    forecast_dates.remote(
        block_refs[0],
        pair
    )
    for pair in all_unique_combos
]
results = ray.get(forecast_futures)

### Append forecasts to the original array

In [None]:
store1df = df[df["Scode"] == "Store1"]
# start with all info from store1
skus = store1df["Pcode"].unique()
store = "Store1"
for i,sku in enumerate(skus):
    # SKU info
    subset = store1df[(store1df["Scode"] == store) & (store1df["Pcode"] == sku)].iloc[0]
    price = subset["Price"]
    # Get new week info
    curr_df = results[i][1]
    new_df = curr_df[104:].reset_index()
    new_df = new_df[["index", "yhat"]].rename(columns={"index": "Week", "yhat": "Quantity Sold"})
    new_df["Scode"] = store
    new_df["Pcode"] = sku
    new_df["Price"] = price
    store1df = pd.concat([store1df, new_df]).reset_index(drop=True)

### Take all the data from store 1 and make it a csv

In [None]:
store1df = store1df.drop_duplicates()
store1df.to_csv("ForecastedInformation.csv", index=False)