In [None]:
pip install Prophet



In [None]:
import pandas as pd
from prophet import Prophet

In [None]:
# Load the retail data
df_raw = pd.read_csv("/content/retail-usa-clothing.csv", parse_dates=True)

# Define hierarchy levels
hierarchy_levels = ['country','region', 'state', 'item']

In [None]:
# Define function for hierarchical forecasting using Prophet
def hierarchical_forecast_prophet(df, hierarchy_level):
    # Group the data by hierarchy level and sum the quantity sold
    df_grouped = df.groupby(hierarchy_level + ['date'])['quantity'].sum().reset_index()

    # Rename columns to match Prophet's requirements
    df_grouped = df_grouped.rename(columns={'date': 'ds', 'quantity': 'y'})

    # Instantiate Prophet model
    model = Prophet()

    # Fit the model to the data
    model.fit(df_grouped)

    # Make future dataframe for forecasting
    future = model.make_future_dataframe(periods=120)  # Forecasting for 365 days

    # Make predictions
    forecast = model.predict(future)

    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [None]:
# Perform hierarchical forecasting at each level of the hierarchy
hierarchy_forecasts = {}
for level in hierarchy_levels:
    hierarchy_forecasts[level] = hierarchical_forecast_prophet(df_raw, [level])


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqjyjqww_/05d94uff.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqjyjqww_/kph4bzrs.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=86073', 'data', 'file=/tmp/tmpqjyjqww_/05d94uff.json', 'init=/tmp/tmpqjyjqww_/kph4bzrs.json', 'output', 'file=/tmp/tmpqjyjqww_/prophet_model6arl0knt/prophet_model-20240214081358.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
08:13:58 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
08:14:01 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqj

In [None]:
# Dictionary to store forecasts
item_hierarchy_forecasts = {}

for item in df_raw['item'].unique():

    item_df = df_raw[df_raw['item'] == item]

    # Country forecast
    country_forecast = hierarchical_forecast_prophet(item_df, ['country'])
    item_hierarchy_forecasts[(item, 'Country')] = country_forecast

    # Region forecasts
    for region in item_df['region'].unique():
       region_df = item_df[item_df['region'] == region]
       region_forecast = hierarchical_forecast_prophet(region_df, ['region'])
       item_hierarchy_forecasts[(item, region)] = region_forecast

    # State forecasts
    for state in item_df['state'].unique():
       for region in item_df[item_df['state'] == state]['region'].unique():
            state_df = item_df[(item_df['state'] == state) & (item_df['region'] == region)]
            state_forecast = hierarchical_forecast_prophet(state_df, ['state'])
            key = (item, region + "_" + state)
            item_hierarchy_forecasts[key] = state_forecast

# Print forecasts
for key, forecast in item_hierarchy_forecasts.items():
    print(f'Forecast for {key}:')
    print(forecast.tail())

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqjyjqww_/oa1r7_dh.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqjyjqww_/a28_v0wb.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=79963', 'data', 'file=/tmp/tmpqjyjqww_/oa1r7_dh.json', 'init=/tmp/tmpqjyjqww_/a28_v0wb.json', 'output', 'file=/tmp/tmpqjyjqww_/prophet_model0ydt_21x/prophet_model-20240214081550.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
08:15:50 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
08:15:51 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqj

Forecast for ('mens_clothing', 'Country'):
             ds        yhat  yhat_lower  yhat_upper
4379 2009-11-21  524.768884  432.966293  607.330120
4380 2009-11-22  525.204584  432.194536  611.749221
4381 2009-11-23  525.153841  437.851749  610.503673
4382 2009-11-24  525.187048  436.983537  608.431721
4383 2009-11-25  525.577923  436.829251  619.971958
Forecast for ('mens_clothing', 'Mid-Alantic'):
             ds       yhat  yhat_lower  yhat_upper
4379 2009-11-21  94.549013   73.484919  115.278544
4380 2009-11-22  94.750957   72.215479  117.286777
4381 2009-11-23  94.735803   72.878672  116.483671
4382 2009-11-24  94.917934   73.710784  116.416095
4383 2009-11-25  94.962995   73.839407  117.309637
Forecast for ('mens_clothing', 'SouthCentral'):
             ds        yhat  yhat_lower  yhat_upper
4379 2009-11-21  123.561235   98.053969  151.297533
4380 2009-11-22  123.761031   98.133877  149.181259
4381 2009-11-23  123.885166   97.189120  148.720675
4382 2009-11-24  124.124078   98.107

In [None]:
# Define the key of interest
key_of_interest = ('kids_clothing', 'SouthCentral_Kentucky')

# Extract the forecast for the key of interest
forecast_of_interest = item_hierarchy_forecasts[key_of_interest]

# Create a new DataFrame from the forecast of interest
testing_df = pd.DataFrame({
    'ds': forecast_of_interest['ds'],
    'yhat': forecast_of_interest['yhat'],
    'yhat_lower': forecast_of_interest['yhat_lower'],
    'yhat_upper': forecast_of_interest['yhat_upper']
})

# Print the forecast DataFrame
print(f'Forecast for {key_of_interest}:')
print(testing_df.tail())

Forecast for ('kids_clothing', 'SouthCentral_Kentucky'):
             ds       yhat  yhat_lower  yhat_upper
4379 2009-11-21  92.013882   86.354110   97.173137
4380 2009-11-22  92.009841   86.533537   97.422477
4381 2009-11-23  92.011025   86.326163   97.332585
4382 2009-11-24  92.027451   86.579982   97.484586
4383 2009-11-25  92.050047   86.379955   97.455959


In [None]:
# Filter the forecast DataFrame for the year 2000
forecast_2000_df = testing_df[(testing_df['ds'] >= '2000-01-01') & (testing_df['ds'] <= '2000-12-31')]

# Print the forecast DataFrame for the year 2000
print(f'Forecast for {key_of_interest} in the year 2000:')
print(forecast_2000_df)

Forecast for ('kids_clothing', 'SouthCentral_Kentucky') in the year 2000:
             ds       yhat  yhat_lower  yhat_upper
767  2000-01-01  28.570215   23.375398   33.331738
768  2000-01-02  28.494077   23.560475   33.454710
769  2000-01-03  28.430864   23.478882   32.867912
770  2000-01-04  28.390565   23.830051   33.154376
771  2000-01-05  28.363922   23.446585   33.142032
...         ...        ...         ...         ...
1128 2000-12-27  28.993966   24.080422   33.573847
1129 2000-12-28  29.072795   24.495170   33.652781
1130 2000-12-29  28.875396   24.099910   33.431302
1131 2000-12-30  29.024722   23.994239   33.947454
1132 2000-12-31  28.967950   24.323854   33.701805

[366 rows x 4 columns]


In [None]:
df_raw

Unnamed: 0,date,state,item,quantity,region,country
0,1997-11-25,NewYork,mens_clothing,8,Mid-Alantic,USA
1,1997-11-26,NewYork,mens_clothing,9,Mid-Alantic,USA
2,1997-11-27,NewYork,mens_clothing,11,Mid-Alantic,USA
3,1997-11-28,NewYork,mens_clothing,11,Mid-Alantic,USA
4,1997-11-29,NewYork,mens_clothing,10,Mid-Alantic,USA
...,...,...,...,...,...,...
388019,2009-07-24,NewYork,mens_shoes,16,Mid-Alantic,USA
388020,2009-07-25,NewYork,mens_shoes,17,Mid-Alantic,USA
388021,2009-07-26,NewYork,mens_shoes,19,Mid-Alantic,USA
388022,2009-07-27,NewYork,mens_shoes,17,Mid-Alantic,USA


In [None]:
# Convert the 'date' column to datetime format
df_raw['date'] = pd.to_datetime(df_raw['date'])
# Filter the dataset based on the specified conditions
filtered_df = df_raw[(df_raw['item'] == 'kids_clothing') &
                 (df_raw['region'] == 'SouthCentral') &
                 (df_raw['state'] == 'Kentucky') &
                 (df_raw['date'].dt.year == 2000)]

# Print or analyze the filtered data
print(filtered_df)

            date     state           item  quantity        region country
56199 2000-01-01  Kentucky  kids_clothing        29  SouthCentral     USA
56200 2000-01-02  Kentucky  kids_clothing        28  SouthCentral     USA
56201 2000-01-03  Kentucky  kids_clothing        27  SouthCentral     USA
56202 2000-01-04  Kentucky  kids_clothing        27  SouthCentral     USA
56203 2000-01-05  Kentucky  kids_clothing        26  SouthCentral     USA
...          ...       ...            ...       ...           ...     ...
56560 2000-12-27  Kentucky  kids_clothing        28  SouthCentral     USA
56561 2000-12-28  Kentucky  kids_clothing        30  SouthCentral     USA
56562 2000-12-29  Kentucky  kids_clothing        30  SouthCentral     USA
56563 2000-12-30  Kentucky  kids_clothing        29  SouthCentral     USA
56564 2000-12-31  Kentucky  kids_clothing        29  SouthCentral     USA

[366 rows x 6 columns]
