In [1]:
import pandas as pd
from prophet import Prophet

In [2]:
# Load your dataset
df = pd.read_csv('STR Detail Report.csv')  # Ensure the path is correct and the file exists


In [3]:
# Check if 'Product Code' and 'Color' columns exist
if 'Product Code' in df.columns and 'Color' in df.columns:
    # Create the n+c column using Product Code and Color
    df['n+c'] = df['Product Code'].astype(str) + '-' + df['Color'].str.upper()
else:
    raise ValueError("Columns 'Product Code' and/or 'Color' do not exist in the dataset")


In [4]:
# Filter the necessary columns
filtered_df = df[['STR Date', 'n+c', 'Received Shop', 'STR Qty']]


In [5]:
# Convert 'STR Date' to datetime
filtered_df['STR Date'] = pd.to_datetime(filtered_df['STR Date'], errors='coerce')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['STR Date'] = pd.to_datetime(filtered_df['STR Date'], errors='coerce')


In [6]:
# Check for null values in 'STR Date' and 'STR Qty' columns
null_counts = filtered_df[['STR Date', 'STR Qty']].isnull().sum()
print("Null values counts:\n", null_counts)


Null values counts:
 STR Date     1
STR Qty     34
dtype: int64


In [7]:
# Filter out rows with null values in 'STR Date' or 'STR Qty'
filtered_df = filtered_df.dropna(subset=['STR Date', 'STR Qty'])


In [8]:
# Number of non-null rows
non_null_count = len(filtered_df)
print("Number of non-null rows in filtered_df:", non_null_count)


Number of non-null rows in filtered_df: 193142


In [9]:
# Rename columns for Prophet
filtered_df.rename(columns={'STR Date': 'ds', 'STR Qty': 'y'}, inplace=True)


In [10]:
# One-hot encode 'n+c' and 'Received Shop' columns
filtered_df = pd.get_dummies(filtered_df, columns=['n+c', 'Received Shop'])

In [11]:
# Initialize a Prophet model
model = Prophet()

In [12]:
# Add regressors
for column in filtered_df.columns:
    if column.startswith('n+c_') or column.startswith('Received Shop_'):
        model.add_regressor(column)


In [None]:
# Fit the model
model.fit(filtered_df)

In [None]:
# Create a DataFrame with future dates (e.g., for the next 30 days)
future_dates = pd.date_range(start=filtered_df['ds'].max(), periods=30, freq='D')
future = pd.DataFrame({'ds': future_dates})

In [None]:
# Get unique values for 'n+c' and 'Received Shop' to create future DataFrame
unique_n_c = filtered_df.filter(like='n+c_').columns
unique_received_shop = filtered_df.filter(like='Received Shop_').columns

In [None]:
# Initialize future DataFrame with zeros for one-hot encoded columns
for column in unique_n_c:
    future[column] = 0
for column in unique_received_shop:
    future[column] = 0

In [None]:
# Example assignment for future predictions (use actual logic instead)
future.loc[:, unique_n_c[0]] = 1  # Replace with your logic
future.loc[:, unique_received_shop[0]] = 1  # Replace with your logic

In [None]:
# Make future predictions
forecast = model.predict(future)

In [None]:
# Display the forecasted values
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])