# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
from tensorflow.keras.layers import Bidirectional
import warnings
from matplotlib.ticker import StrMethodFormatter       # to set a comma format
import matplotlib.dates as mdates

# Ignore all warnings
warnings.filterwarnings('ignore')

In [None]:
plt.rcParams["figure.figsize"] = (10,6)
plt.rcParams['figure.dpi'] = 300

In [None]:
# Load your dataset
data = pd.read_csv('/kaggle/input/productdemandforecasting/Historical Product Demand.csv')  # Replace with your file path

# Display the first few rows of the dataset
data


In [None]:
df=data.copy()

In [None]:
df.dropna(subset=['Date'], inplace=True)
df.dropna(subset=['Order_Demand'], inplace=True)

In [None]:
df['Order_Demand'] = df['Order_Demand'].str.replace('(',"")
df['Order_Demand'] = df['Order_Demand'].str.replace(')',"")
df['Order_Demand'] = df['Order_Demand'].astype('int64')

In [None]:
df['Date'] = pd.to_datetime(df['Date'])  # Ensure Date column is datetime
df.sort_values('Date', inplace=True)  # Sort by date

In [None]:
df['Year'] = df['Date'].dt.year

In [None]:
df['Month'] = df['Date'].dt.month_name()

In [None]:
df1 = df[['Year', 'Warehouse', 'Order_Demand']].groupby(['Year', 'Warehouse'], as_index=False).count()

In [None]:
df1  = df1.pivot(index='Year', columns='Warehouse', values='Order_Demand')

In [None]:
df1['Total'] = df1.sum(axis=1)

# Years' Distrubtion

In [None]:
values = df1['Total']
labels = df1.index

cmap = cm.get_cmap('Paired', len(values))
colors = cmap(range(len(values)))

explode_list = [0.2, 0, 0, 0, 0, 0, 0.2]
plt.figure(figsize=(10, 6))

plt.pie(values,labels=labels, autopct='%1.1f%%',
            shadow=True, startangle=90, colors=colors, textprops={'fontsize': 12, "fontweight" : "bold", "color":"darkblue"},  wedgeprops=
           {'edgecolor':'darkblue'} , labeldistance=1.15)

plt.title('Order_Demand Trend [2011 - 2017]', y=1.12, fontsize=16)  # scale the title font size up
plt.axis('equal')

plt.legend(labels=labels, loc='upper left', bbox_to_anchor=(0, 1))
plt.tight_layout()
plt.show()

# Month's Distrubtion

In [None]:
monthly_demand = df.groupby('Month')['Order_Demand'].sum().reindex([
    'January', 'February', 'March', 'April', 'May', 'June',
    'July', 'August', 'September', 'October', 'November', 'December'
])

mean_demand = monthly_demand.mean()

plt.figure(figsize=(10, 6))
sns.barplot(x=monthly_demand.index, y=monthly_demand.values, palette='Set2')

plt.axhline(mean_demand, color='red', linestyle='--', label='Mean Order Demand')

plt.title('Total Order Demand by Month Across All Years', fontsize=16)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Total Order Demand', fontsize=12)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Warehouse plots

In [None]:
df_ware=df.groupby(['Warehouse'], observed = False)['Order_Demand'].count().reset_index().copy()

In [None]:
values = df_ware['Order_Demand']
labels = df_ware['Warehouse']

cmap = cm.get_cmap('Paired', len(values))
colors = cmap(range(len(values)))

explode_list = [0.2, 0, 0, 0, 0, 0, 0.2]
plt.figure(figsize=(10, 6))

plt.pie(values,labels=labels, autopct='%1.1f%%',
            shadow=True, startangle=90, colors=colors, textprops={'fontsize': 12, "fontweight" : "bold", "color":"darkblue"},  wedgeprops=
           {'edgecolor':'darkblue'} , labeldistance=1.15)

plt.title('Warehouse Order_Demand Trend', y=1.12, fontsize=16)  # scale the title font size up
plt.axis('equal')

plt.legend(labels=labels, loc='upper left', bbox_to_anchor=(0, 1))
plt.tight_layout()
plt.show()

In [None]:
df2=df.copy()

In [None]:
df2['Date'] = df2['Date'].dt.to_period('M').dt.to_timestamp()

In [None]:
df2.rename(columns = {'Date':'date_m'}, inplace = True)

In [None]:
df2 = df2[ (df2['date_m'] >= '2012-01-01') & (df2['date_m'] < '2017-01-01') ]

In [None]:
df_month_cat = df2.groupby(['date_m', 'Warehouse', 'Product_Category'], observed = False)['Order_Demand'].sum().reset_index().copy()
tt_m = df_month_cat.groupby('date_m', observed = False)['Order_Demand'].sum().reset_index().copy()
twh = df_month_cat.groupby(['date_m', 'Warehouse'], observed = False)['Order_Demand'].sum().reset_index().copy()

In [None]:
fig, ax = plt.subplots(figsize = (10, 6))

sns.set_theme(style = 'whitegrid', palette = 'Set2')

sns.lineplot(x = "date_m", y = "Order_Demand", data = tt_m, ax = ax)
sns.scatterplot(x = "date_m", y = "Order_Demand", data = tt_m, ax = ax)

# yaxis with comma
ax.yaxis.set_major_formatter( StrMethodFormatter('{x:,.0f}') )

# set Minor ticks for every month in xaxis
minor_locator = mdates.MonthLocator( bymonth = range(1, 13) )
ax.xaxis.set_minor_locator(minor_locator)

# set the size of ticks
ax.tick_params(axis = 'x', which = 'major', length = 10, width = 2)      # Major ticks
ax.tick_params(axis = 'x', which = 'minor', length = 5, width = 1)       # Minor ticks

ax.set_title('Total Product Orders per Month', pad = 15)
ax.set_xlabel('Months')
ax.set_ylabel('Order Demands')
sns.despine( ax = ax )


# Monthly demand for each warehouse

In [None]:
sns.set_theme(style="whitegrid", palette="Set2")

# Create a FacetGrid for plotting
g = sns.FacetGrid(twh, col="Warehouse", col_wrap=2, height=4, aspect=1.5)
g.map_dataframe(sns.lineplot, x='date_m', y='Order_Demand', hue='Warehouse')
g.map_dataframe(sns.scatterplot, x='date_m', y='Order_Demand', hue='Warehouse', s=50)

# Format y-axis
for ax in g.axes.flat:
    ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))

# Add titles and labels
g.set_axis_labels('Date', 'Total Order Demand')
g.fig.suptitle('Total Order Demand by Warehouse', y=1.02)

# Adjust the spacing between graphs
plt.subplots_adjust(top=0.9, hspace=0.2, wspace=0.1)


# Pre Processing

In [None]:
# Get the list of unique warehouses
warehouses = data['Warehouse'].unique()
warehouses

In [None]:
data_1 =  data[data['Warehouse'] == 'Whse_A']
data_2 =  data[data['Warehouse'] == 'Whse_C']
data_3 =  data[data['Warehouse'] == 'Whse_S']
data_4 =  data[data['Warehouse'] == 'Whse_J']

Project Title: DemandXbert
Project Overview:
The goal of this project is to develop a demand forecasting model for a manufacturing company with a global presence. The company distributes thousands of products across dozens of categories, shipping them from four central warehouses. These warehouses are responsible for supplying products to their respective regions, with shipping times that typically exceed one month due to the global distribution of manufacturing sites.

By accurately forecasting monthly demand for the month after next for each product within each warehouse, the company can better manage inventory, reduce costs, and streamline logistics. This project aims to develop a forecasting system that will provide precise demand predictions, allowing the company to plan for shipments more efficiently and optimize its operations.

# Whse_A

In [None]:
data_1.drop(['Warehouse', 'Product_Category'], axis=1, inplace=True)


In [None]:
data_1

In [None]:
# Preprocess the dataset
data_1['Date'] = pd.to_datetime(data_1['Date'])  # Ensure Date column is datetime
data_1.sort_values('Date', inplace=True)  # Sort by date

# Ensure Order_Demand is numeric
data_1['Order_Demand'] = pd.to_numeric(data_1['Order_Demand'], errors='coerce')

# Display data types and check for null values
print(data_1.dtypes)
print(data_1.isnull().sum())


In [None]:
# Drop rows with missing dates
data_1.dropna(subset=['Date'], inplace=True)

# For Order_Demand, we can either drop the missing values or fill them
# Here, we will drop rows with missing Order_Demand values
data_1.dropna(subset=['Order_Demand'], inplace=True)


# Check if there are still missing values
print(data_1.isnull().sum())


In [None]:
dataTest = data_1.copy()

In [None]:
# Extract year from the Date column
dataTest['Year'] = data_1['Date'].dt.year

# Group by the Year and count the number of rows for each year
rows_per_year = dataTest.groupby('Year').size()

# Display the number of rows per year
print(rows_per_year)

In [None]:
# Assuming 'Date' column is already in datetime format
data_1['Year'] = data_1['Date'].dt.year

# Filter the data for the years between 2014 and 2016 (inclusive)
filtered_data_1 = data_1[(data_1['Year'] >= 2013) & (data_1['Year'] <= 2016)]

# Drop the 'Year' column as it was only used for filtering
filtered_data_1.drop(columns=['Year'], inplace=True)


In [None]:
filtered_data_1['Product_Code'].value_counts()

In [None]:
product_code_counts = filtered_data_1['Product_Code'].value_counts()

# Filter to get only those Product_Codes with counts greater than 500
product_codes_greater_than_1000 = product_code_counts[product_code_counts > 1000]

# Display the result
print(product_codes_greater_than_1000)

In [None]:
threshold = 1000

# Get products that occur more than the threshold
frequent_products = filtered_data_1['Product_Code'].value_counts()[filtered_data_1['Product_Code'].value_counts() > threshold].index

# Filter the dataset to keep only the frequent products
filtered_data_1 = filtered_data_1[filtered_data_1['Product_Code'].isin(frequent_products)]

# Display the filtered data
print(filtered_data_1.head())


In [None]:
filtered_data_1

In [None]:
#  Aggregate the data by Product_Code and Date
data_aggregated_1 = filtered_data_1.groupby(['Product_Code', 'Date']).sum().reset_index()

# Display the aggregated data
data_aggregated_1


In [None]:
data_aggregated_1['Product_Code'].value_counts()

In [None]:
data_pivot_1 = data_aggregated_1.pivot(index='Date', columns='Product_Code', values='Order_Demand').fillna(0)

# Display the pivoted data
print(data_pivot_1)

# Resample the pivoted data weekly and fill missing values with 0
data_pivot_resampled = data_pivot_1.resample('W').sum().fillna(0)


In [None]:
data_resampled_weekly = data_pivot_1.resample('W').sum()
data_resampled_weekly

In [None]:
data_pivot_1

In [None]:
# Step 1: Resample to weekly data (sum of daily demand)
data_resampled_weekly = data_pivot_1.resample('W').sum()

# Step 2: Repeat the weekly values for 7 days and divide by 7
# Reindex the daily range to fill all missing days
daily_index = pd.date_range(start=data_resampled_weekly.index.min(),
                            end=data_resampled_weekly.index.max(), freq='D')

# Expand the weekly data to daily, assigning the weekly total to each day of the week
data_resampled_daily = data_resampled_weekly.reindex(daily_index).ffill() / 7

# Step 3: Display the resampled daily data
print(data_resampled_daily.head())

In [None]:
filtered_data_1['Product_Code'].value_counts()

In [None]:
# Count the occurrences of each product code
product_counts = filtered_data_1['Product_Code'].value_counts()

# Create a bar plot for product distribution
plt.figure(figsize=(10, 6))
sns.barplot(x=product_counts.index, y=product_counts.values)
plt.title('Distribution of Product Codes')
plt.xlabel('Product Code')
plt.ylabel('Count')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()

# model_1

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_resampled_daily)

In [None]:
scaled_data

In [None]:
# Prepare data for LSTM
def create_sequences(data, time_steps=1):
    X, y = [], []

    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps for LSTM
time_steps = 30  # e.g., 30 days
X, y = create_sequences(scaled_data, time_steps)

# Display the shapes of the data
print(X.shape, y.shape)


In [None]:
#Split the data into training and testing sets
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


# Display the shapes of the training and testing data
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Define the model
model = Sequential()

# First LSTM Layer with corrected input shape
model.add(LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))

# Output Layer (for each product in multi-product forecasting)
model.add(Dense(X_train.shape[2]))  # Number of outputs (one per product)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.02)  # Adjust the learning rate if needed
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Print model summary
model.summary()




In [None]:


# Early stopping to stop training when validation loss doesn't improve after 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Fit the model
history = model.fit(
    X_train, y_train,  # Training data
    epochs=50,  # You can adjust the number of epochs based on your data and model's performance
    batch_size=32,  # Batch size, tune this for better results
    validation_data=(X_test, y_test),  # Validation data
    callbacks=[early_stopping],  # Early stopping callback
    verbose=1  # Shows the training process
)







# After training, you can plot the history to see how the model has learned over epochs

In [None]:
# Evaluate training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# Make predictions
y_pred = model.predict(X_test)


# Display the shape of predictions

print(y_pred.shape)


In [None]:
# Inverse transform the predictions and actual values

y_pred_inverse = scaler.inverse_transform(y_pred)

y_test_inverse = scaler.inverse_transform(y_test)

# Display the shapes of inverse transformed predictions and actual values
print(y_pred_inverse.shape, y_test_inverse.shape)

In [None]:
selected_products =  ['Product_1539', 'Product_1470', 'Product_1496', 'Product_1410',
        'Product_1424']

# Create a DataFrame for actual and predicted values
results_df = pd.DataFrame({
    'Date': data_resampled_daily.index[-len(y_test):]  # Get the corresponding dates for testing
})

# Extract actual and predicted values for each selected product
for product in selected_products:
    product_index = data_resampled_daily.columns.get_loc(product)  # Get the index of the product
    results_df[f'Actual_{product}'] = y_test_inverse[:, product_index]  # Actual values
    results_df[f'Predicted_{product}'] = y_pred_inverse[:, product_index]  # Predicted values

# Set 'Date' as index
results_df.set_index('Date', inplace=True)

# Resample to monthly frequency (sum of orders in each month)
results_df_monthly = results_df.resample('M').sum()

# Plot actual vs predicted for each selected product (monthly)
plt.figure(figsize=(15, 10))
for i, product in enumerate(selected_products):
    plt.subplot(len(selected_products), 1, i + 1)
    plt.plot(results_df_monthly.index, results_df_monthly[f'Actual_{product}'],
             label='Actual', color='blue')
    plt.plot(results_df_monthly.index, results_df_monthly[f'Predicted_{product}'],
             label='Predicted', color='orange')
    plt.title(f'Monthly Actual vs Predicted for {product}')
    plt.xlabel('Date')
    plt.ylabel('Order Demand')
    plt.legend()

plt.tight_layout()
plt.show()

# Forcasting

In [None]:
# Forecast for the next 40 days
def forecast_next_days(model, last_data, days=40):
    predictions = []
    current_data = last_data
    for _ in range(days):
        next_pred = model.predict(current_data.reshape(1, time_steps, current_data.shape[1]))
        predictions.append(next_pred[0])
        current_data = np.append(current_data[1:], [next_pred[0]], axis=0)  # Update the input for the next prediction
    return np.array(predictions)

# Prepare the last input data from the training set
last_input = scaled_data[-time_steps:]

# Generate the forecast
forecast = forecast_next_days(model, last_input)


In [None]:
#Inverse transform the scaled predictions to get actual values
forecast_inverse = scaler.inverse_transform(forecast)

#Create a DataFrame for the forecast results
forecast_dates = pd.date_range(start=data_resampled_daily.index[-1] + pd.Timedelta(days=1), periods=40)
forecast_df = pd.DataFrame(data=forecast_inverse, index=forecast_dates, columns=data_resampled_daily.columns)

# Display the forecast results
print(forecast_df)


In [None]:
# Define the products to visualize
selected_products =  ['Product_1539', 'Product_1470', 'Product_1496', 'Product_1410',
        'Product_1424']
# Filter the forecast_df to include only the selected products
forecast_selected_df = forecast_df[selected_products]

# Resample the data to monthly frequency (month-end)
forecast_monthly_df = forecast_selected_df.resample('W').sum()  # You can also use 'mean' instead of 'sum' if needed

# Visualize the forecast results for the selected products on a monthly basis
plt.figure(figsize=(12, 6))
for product in forecast_selected_df.columns:
    plt.plot(forecast_selected_df.index, forecast_selected_df[product], label=product)
plt.title('Monthly Forecasted Demand for Selected Products')
plt.xlabel('Date')
plt.ylabel('Forecasted Order Demand')
plt.legend()
plt.show()


# Whse_C

In [None]:
data_2.drop(['Warehouse', 'Product_Category'], axis=1, inplace=True)


In [None]:
# Preprocess the dataset
data_2['Date'] = pd.to_datetime(data_2['Date'])  # Ensure Date column is datetime
data_2.sort_values('Date', inplace=True)  # Sort by date

# Ensure Order_Demand is numeric
data_2['Order_Demand'] = pd.to_numeric(data_2['Order_Demand'], errors='coerce')

# Display data types and check for null values
print(data_2.dtypes)
print(data_2.isnull().sum())


In [None]:
# Drop rows with missing dates
data_2.dropna(subset=['Date'], inplace=True)

# For Order_Demand, we can either drop the missing values or fill them
# Here, we will drop rows with missing Order_Demand values
data_2.dropna(subset=['Order_Demand'], inplace=True)

# Check if there are still missing values
print(data_2.isnull().sum())


In [None]:
dataTest = data_2.copy()

In [None]:
# Extract year from the Date column
dataTest['Year'] = data_2['Date'].dt.year

# Group by the Year and count the number of rows for each year
rows_per_year = dataTest.groupby('Year').size()

# Display the number of rows per year
print(rows_per_year)

In [None]:
# Assuming 'Date' column is already in datetime format
data_2['Year'] = data_2['Date'].dt.year

# Filter the data for the years between 2014 and 2016 (inclusive)
filtered_data_2 = data_2[(data_2['Year'] >= 2013) & (data_2['Year'] <= 2016)]

# Drop the 'Year' column as it was only used for filtering
filtered_data_2.drop(columns=['Year'], inplace=True)


In [None]:
filtered_data_2['Product_Code'].value_counts()

In [None]:
product_code_counts = filtered_data_2['Product_Code'].value_counts()

# Filter to get only those Product_Codes with counts greater than 500
product_codes_greater_than_400 = product_code_counts[product_code_counts > 400]

# Display the result
print(product_codes_greater_than_400)

In [None]:
threshold = 400

# Get products that occur more than the threshold
frequent_products = filtered_data_2['Product_Code'].value_counts()[filtered_data_2['Product_Code'].value_counts() > threshold].index

# Filter the dataset to keep only the frequent products
filtered_data_2 = filtered_data_2[filtered_data_2['Product_Code'].isin(frequent_products)]

# Display the filtered data
print(filtered_data_2.head())

In [None]:
#  Aggregate the data by Product_Code and Date
data_aggregated_2 = filtered_data_2.groupby(['Product_Code', 'Date']).sum().reset_index()

# Display the aggregated data
data_aggregated_2


In [None]:
data_aggregated_2['Product_Code'].value_counts()

In [None]:
data_pivot_2 = data_aggregated_2.pivot(index='Date', columns='Product_Code', values='Order_Demand').fillna(0)

# Display the pivoted data
print(data_pivot_2)


# Resample the pivoted data weekly and fill missing values with 0
data_pivot_resampled = data_pivot_2.resample('W').sum().fillna(0)


In [None]:
data_pivot_2

In [None]:
# Step 1: Resample to weekly data (sum of daily demand)
data_resampled_weekly = data_pivot_2.resample('W').sum()

# Step 2: Repeat the weekly values for 7 days and divide by 7
# Reindex the daily range to fill all missing days
daily_index = pd.date_range(start=data_resampled_weekly.index.min(),
                            end=data_resampled_weekly.index.max(), freq='D')

# Expand the weekly data to daily, assigning the weekly total to each day of the week
data_resampled_daily = data_resampled_weekly.reindex(daily_index).ffill() / 7

# Step 3: Display the resampled daily data
print(data_resampled_daily.head())

In [None]:
# Count the occurrences of each product code
product_counts = filtered_data_2['Product_Code'].value_counts()

# Create a bar plot for product distribution
plt.figure(figsize=(10, 6))
sns.barplot(x=product_counts.index, y=product_counts.values)
plt.title('Distribution of Product Codes')
plt.xlabel('Product Code')
plt.ylabel('Count')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()

# Model 2

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_resampled_daily)

In [None]:
scaled_data

In [None]:
# Prepare data for LSTM
def create_sequences(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps for LSTM
time_steps = 30  # e.g., 30 days
X, y = create_sequences(scaled_data, time_steps)

# Display the shapes of the data
print(X.shape, y.shape)


In [None]:
#Split the data into training and testing sets
split = int(len(X) * 0.9)


X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


# Display the shapes of the training and testing data
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

In [None]:
# Define the model
model = Sequential()

# First LSTM Layer with corrected input shape
model.add(LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))

# Output Layer (for each product in multi-product forecasting)
model.add(Dense(X_train.shape[2]))  # Number of outputs (one per product)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.02)  # Adjust the learning rate if needed
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Print model summary
model.summary()




In [None]:


# Early stopping to stop training when validation loss doesn't improve after 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Fit the model
history = model.fit(
    X_train, y_train,  # Training data
    epochs=50,  # You can adjust the number of epochs based on your data and model's performance
    batch_size=32,  # Batch size, tune this for better results
    validation_data=(X_test, y_test),  # Validation data
    callbacks=[early_stopping],  # Early stopping callback
    verbose=1  # Shows the training process
)






In [None]:
# Evaluate training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# Make predictions
y_pred = model.predict(X_test)



# Display the shape of predictions
print(y_pred.shape)

In [None]:
# Inverse transform the predictions and actual values
y_pred_inverse = scaler.inverse_transform(y_pred)
y_test_inverse = scaler.inverse_transform(y_test)

# Display the shapes of inverse transformed predictions and actual values
print(y_pred_inverse.shape, y_test_inverse.shape)

In [None]:
filtered_data_2['Product_Code'].value_counts()

In [None]:
selected_products =  ['Product_0349', 'Product_2167', 'Product_0191', 'Product_1342',
        'Product_1432']

# Create a DataFrame for actual and predicted values
results_df = pd.DataFrame({
    'Date': data_pivot_2.index[-len(y_test):]  # Get the corresponding dates for testing
})

# Extract actual and predicted values for each selected product
for product in selected_products:
    product_index = data_pivot_2.columns.get_loc(product)  # Get the index of the product
    results_df[f'Actual_{product}'] = y_test_inverse[:, product_index]  # Actual values
    results_df[f'Predicted_{product}'] = y_pred_inverse[:, product_index]  # Predicted values

# Set 'Date' as index
results_df.set_index('Date', inplace=True)

# Resample to monthly frequency (sum of orders in each month)
results_df_monthly = results_df.resample('M').sum()

# Plot actual vs predicted for each selected product (monthly)
plt.figure(figsize=(15, 10))
for i, product in enumerate(selected_products):
    plt.subplot(len(selected_products), 1, i + 1)
    plt.plot(results_df_monthly.index, results_df_monthly[f'Actual_{product}'],
             label='Actual', color='blue')
    plt.plot(results_df_monthly.index, results_df_monthly[f'Predicted_{product}'],
             label='Predicted', color='orange')
    plt.title(f'Monthly Actual vs Predicted for {product}')
    plt.xlabel('Date')
    plt.ylabel('Order Demand')
    plt.legend()

plt.tight_layout()
plt.show()

# Forcasting

In [None]:
# Forecast for the next 40 days
def forecast_next_days(model, last_data, days=40):
    predictions = []
    current_data = last_data
    for _ in range(days):
        next_pred = model.predict(current_data.reshape(1, time_steps, current_data.shape[1]))
        predictions.append(next_pred[0])
        current_data = np.append(current_data[1:], [next_pred[0]], axis=0)  # Update the input for the next prediction
    return np.array(predictions)

# Prepare the last input data from the training set
last_input = scaled_data[-time_steps:]

# Generate the forecast
forecast = forecast_next_days(model, last_input)


In [None]:
#Inverse transform the scaled predictions to get actual values
forecast_inverse = scaler.inverse_transform(forecast)

#Create a DataFrame for the forecast results
forecast_dates = pd.date_range(start=data_resampled_daily.index[-1] + pd.Timedelta(days=1), periods=40)
forecast_df = pd.DataFrame(data=forecast_inverse, index=forecast_dates, columns=data_resampled_daily.columns)

# Display the forecast results
print(forecast_df)


In [None]:
# Define the products to visualize
selected_products =  ['Product_0349', 'Product_2167', 'Product_0191', 'Product_1342',
        'Product_1432']

# Filter the forecast_df to include only the selected products
forecast_selected_df = forecast_df[selected_products]

# Resample the data to monthly frequency (month-end)
forecast_monthly_df = forecast_selected_df.resample('W').sum()  # You can also use 'mean' instead of 'sum' if needed

# Visualize the forecast results for the selected products on a monthly basis
plt.figure(figsize=(12, 6))
for product in forecast_selected_df.columns:
    plt.plot(forecast_selected_df.index, forecast_selected_df[product], label=product)
plt.title('Monthly Forecasted Demand for Selected Products')
plt.xlabel('Date')
plt.ylabel('Forecasted Order Demand')
plt.legend()
plt.show()


# Whse_S

In [None]:
data_3.drop(['Warehouse', 'Product_Category'], axis=1, inplace=True)


In [None]:
# Preprocess the dataset
data_3['Date'] = pd.to_datetime(data_3['Date'])  # Ensure Date column is datetime
data_3.sort_values('Date', inplace=True)  # Sort by date

# Ensure Order_Demand is numeric
data_3['Order_Demand'] = pd.to_numeric(data_3['Order_Demand'], errors='coerce')

# Display data types and check for null values
print(data_3.dtypes)
print(data_3.isnull().sum())



In [None]:
# Drop rows with missing dates
data_3.dropna(subset=['Date'], inplace=True)

# For Order_Demand, we can either drop the missing values or fill them
# Here, we will drop rows with missing Order_Demand values
data_3.dropna(subset=['Order_Demand'], inplace=True)

# Check if there are still missing vaues
print(data_3.isnull().sum())


In [None]:
dataTest = data_3.copy()

In [None]:
# Extract year from the Date column
dataTest['Year'] = data_3['Date'].dt.year

# Group by the Year and count the number of rows for each year
rows_per_year = dataTest.groupby('Year').size()

# Display the number of rows per year
print(rows_per_year)

In [None]:
# Assuming 'Date' column is already in datetime format
data_3['Year'] = data_3['Date'].dt.year

# Filter the data for the years between 2014 and 2016 (inclusive)
filtered_data_3 = data_3[(data_3['Year'] >= 2013) & (data_3['Year'] <= 2016)]

# Drop the 'Year' column as it was only used for filtering
filtered_data_3.drop(columns=['Year'], inplace=True)


In [None]:
filtered_data_3['Product_Code'].value_counts()

In [None]:
product_code_counts = filtered_data_3['Product_Code'].value_counts()

# Filter to get only those Product_Codes with counts greater than 500
product_codes_greater_than_400 = product_code_counts[product_code_counts > 200]

# Display the result
print(product_codes_greater_than_400)

In [None]:
threshold = 200

# Get products that occur more than the threshold
frequent_products = filtered_data_3['Product_Code'].value_counts()[filtered_data_3['Product_Code'].value_counts() > threshold].index

# Filter the dataset to keep only the frequent products
filtered_data_3 = filtered_data_3[filtered_data_3['Product_Code'].isin(frequent_products)]

# Display the filtered data
print(filtered_data_3.head())


In [None]:
#  Aggregate the data by Product_Code and Date
data_aggregated_3 = filtered_data_3.groupby(['Product_Code', 'Date']).sum().reset_index()

# Display the aggregated data
data_aggregated_3


In [None]:
data_aggregated_3['Product_Code'].value_counts()


In [None]:
data_pivot_3 = data_aggregated_3.pivot(index='Date', columns='Product_Code', values='Order_Demand').fillna(0)

# Display the pivoted data
print(data_pivot_3)

# Resample the pivoted data weekly and fill missing values with 0
data_pivot_resampled = data_pivot_3.resample('W').sum().fillna(0)


In [None]:
# Step 1: Resample to weekly data (sum of daily demand)
data_resampled_weekly = data_pivot_3.resample('W').sum()


# Step 2: Repeat the weekly values for 7 days and divide by 7
# Reindex the daily range to fill all missing days
daily_index = pd.date_range(start=data_resampled_weekly.index.min(),
                            end=data_resampled_weekly.index.max(), freq='D')

# Expand the weekly data to daily, assigning the weekly total to each day of the week
data_resampled_daily = data_resampled_weekly.reindex(daily_index).ffill() / 7

# Step 3: Display the resampled daily data
print(data_resampled_daily.head())

In [None]:
data_resampled_daily


In [None]:
# Count the occurrences of each product code
product_counts = filtered_data_3['Product_Code'].value_counts()

# Create a bar plot for product distribution
plt.figure(figsize=(10, 6))
sns.barplot(x=product_counts.index, y=product_counts.values)
plt.title('Distribution of Product Codes')
plt.xlabel('Product Code')
plt.ylabel('Count')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()

# Model 3

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_resampled_daily)

In [None]:
# Prepare data for LSTM
def create_sequences(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps for LSTM
time_steps = 30  # e.g., 30 days
X, y = create_sequences(scaled_data, time_steps)

# Display the shapes of the data
print(X.shape, y.shape)


In [None]:
#Split the data into training and testing sets
split = int(len(X) * 0.9)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


# Display the shapes of the training and testing data
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

In [None]:
# Define the model
model = Sequential()

# First LSTM Layer with corrected input shape
model.add(LSTM(100, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))

# Output Layer (for each product in multi-product forecasting)
model.add(Dense(X_train.shape[2]))  # Number of outputs (one per product)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.001)  # Adjust the learning rate if needed
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Print model summary
model.summary()



In [None]:


# Early stopping to stop training when validation loss doesn't improve after 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Fit the model
history = model.fit(
    X_train, y_train,  # Training data
    epochs=50,  # You can adjust the number of epochs based on your data and model's performance
    batch_size=32,  # Batch size, tune this for better results
    validation_data=(X_test, y_test),  # Validation data
    callbacks=[early_stopping],  # Early stopping callback
    verbose=1  # Shows the training process
)







In [None]:
# Evaluate training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Display the shape of predictions

print(y_pred.shape)

In [None]:
# Inverse transform the predictions and actual values
y_pred_inverse = scaler.inverse_transform(y_pred)
y_test_inverse = scaler.inverse_transform(y_test)

# Display the shapes of inverse transformed predictions and actual values
print(y_pred_inverse.shape, y_test_inverse.shape)

In [None]:
filtered_data_3['Product_Code'].value_counts().head(10)

In [None]:
selected_products =  ['Product_1263', 'Product_2138', 'Product_2132', 'Product_0033',
        'Product_1341']

# Create a DataFrame for actual and predicted values
results_df = pd.DataFrame({
    'Date': data_resampled_daily.index[-len(y_test):]  # Get the corresponding dates for testing
})

# Extract actual and predicted values for each selected product
for product in selected_products:
    product_index = data_resampled_daily.columns.get_loc(product)  # Get the index of the product
    results_df[f'Actual_{product}'] = y_test_inverse[:, product_index]  # Actual values
    results_df[f'Predicted_{product}'] = y_pred_inverse[:, product_index]  # Predicted values

# Set 'Date' as index
results_df.set_index('Date', inplace=True)

# Resample to monthly frequency (sum of orders in each month)
results_df_monthly = results_df.resample('M').sum()

# Plot actual vs predicted for each selected product (monthly)
plt.figure(figsize=(15, 10))
for i, product in enumerate(selected_products):
    plt.subplot(len(selected_products), 1, i + 1)
    plt.plot(results_df_monthly.index, results_df_monthly[f'Actual_{product}'],
             label='Actual', color='blue')
    plt.plot(results_df_monthly.index, results_df_monthly[f'Predicted_{product}'],
             label='Predicted', color='orange')
    plt.title(f'Monthly Actual vs Predicted for {product}')
    plt.xlabel('Date')
    plt.ylabel('Order Demand')
    plt.legend()

plt.tight_layout()
plt.show()

# Forcasting

In [None]:
# Forecast for the next 40 days
def forecast_next_days(model, last_data, days=40):
    predictions = []
    current_data = last_data
    for _ in range(days):
        next_pred = model.predict(current_data.reshape(1, time_steps, current_data.shape[1]))
        predictions.append(next_pred[0])
        current_data = np.append(current_data[1:], [next_pred[0]], axis=0)  # Update the input for the next prediction
    return np.array(predictions)

# Prepare the last input data from the training set
last_input = scaled_data[-time_steps:]

# Generate the forecast
forecast = forecast_next_days(model, last_input)


In [None]:
#Inverse transform the scaled predictions to get actual values
forecast_inverse = scaler.inverse_transform(forecast)

#Create a DataFrame for the forecast results
forecast_dates = pd.date_range(start=data_resampled_daily.index[-1] + pd.Timedelta(days=1), periods=40)
forecast_df = pd.DataFrame(data=forecast_inverse, index=forecast_dates, columns=data_resampled_daily.columns)

# Display the forecast results
print(forecast_df)


In [None]:
# Define the products to visualize
selected_products =  ['Product_1263', 'Product_2138', 'Product_2132', 'Product_0033',
        'Product_1341']

# Filter the forecast_df to include only the selected products
forecast_selected_df = forecast_df[selected_products]

# Resample the data to monthly frequency (month-end)
forecast_monthly_df = forecast_selected_df.resample('W').sum()  # You can also use 'mean' instead of 'sum' if needed

# Visualize the forecast results for the selected products on a monthly basis
plt.figure(figsize=(12, 6))
for product in forecast_selected_df.columns:
    plt.plot(forecast_selected_df.index, forecast_selected_df[product], label=product)
plt.title('Monthly Forecasted Demand for Selected Products')
plt.xlabel('Date')
plt.ylabel('Forecasted Order Demand')
plt.legend()
plt.show()


# Whse_J

In [None]:
data_4.drop(['Warehouse', 'Product_Category'], axis=1, inplace=True)


In [None]:
# Preprocess the dataset
data_4['Date'] = pd.to_datetime(data_4['Date'])  # Ensure Date column is datetime
data_4.sort_values('Date', inplace=True)  # Sort by date

# Ensure Order_Demand is numeric
data_4['Order_Demand'] = pd.to_numeric(data_4['Order_Demand'], errors='coerce')

# Display data types and check for null values
print(data_4.dtypes)
print(data_4.isnull().sum())


In [None]:
# Drop rows with missing dates
data_4.dropna(subset=['Date'], inplace=True)

# For Order_Demand, we can either drop the missing values or fill them
# Here, we will drop rows with missing Order_Demand values
data_4.dropna(subset=['Order_Demand'], inplace=True)

# Check if there are still missing values
print(data_4.isnull().sum())


In [None]:
dataTest = data_4.copy()

In [None]:
# Extract year from the Date column
dataTest['Year'] = data_4['Date'].dt.year

# Group by the Year and count the number of rows for each year
rows_per_year = dataTest.groupby('Year').size()

# Display the number of rows per year
print(rows_per_year)

In [None]:
# Assuming 'Date' column is already in datetime format
data_4['Year'] = data_4['Date'].dt.year

# Filter the data for the years between 2014 and 2016 (inclusive)
filtered_data_4 = data_4[(data_4['Year'] >= 2013) & (data_4['Year'] <= 2016)]

# Drop the 'Year' column as it was only used for filtering
filtered_data_4.drop(columns=['Year'], inplace=True)

In [None]:
filtered_data_4['Product_Code'].value_counts()

In [None]:
product_code_counts = filtered_data_4['Product_Code'].value_counts()

# Filter to get only those Product_Codes with counts greater than 500
product_codes_greater_than_1000 = product_code_counts[product_code_counts > 500]


# Display the result
print(product_codes_greater_than_1000)

In [None]:
threshold = 500

# Get products that occur more than the threshold
frequent_products = filtered_data_4['Product_Code'].value_counts()[filtered_data_4['Product_Code'].value_counts() > threshold].index

# Filter the dataset to keep only the frequent products
filtered_data_4 = filtered_data_4[filtered_data_4['Product_Code'].isin(frequent_products)]

# Display the filtered data
print(filtered_data_4.head())

In [None]:
#  Aggregate the data by Product_Code and Date
data_aggregated_4 = filtered_data_4.groupby(['Product_Code', 'Date']).sum().reset_index()

# Display the aggregated data
data_aggregated_4


In [None]:
data_pivot_4 = data_aggregated_4.pivot(index='Date', columns='Product_Code', values='Order_Demand').fillna(0)

# Display the pivoted data
print(data_pivot_4)

# Resample the pivoted data weekly and fill missing values with 0
data_pivot_resampled = data_pivot_4.resample('W').sum().fillna(0)


In [None]:
# Step 1: Resample to weekly data (sum of daily demand)
data_resampled_weekly = data_pivot_4.resample('W').sum()

# Step 2: Repeat the weekly values for 7 days and divide by 7
# Reindex the daily range to fill all missing days
daily_index = pd.date_range(start=data_resampled_weekly.index.min(),
                            end=data_resampled_weekly.index.max(), freq='D')

# Expand the weekly data to daily, assigning the weekly total to each day of the week
data_resampled_daily = data_resampled_weekly.reindex(daily_index).ffill() / 7

# Step 3: Display the resampled daily data
print(data_resampled_daily.head())

In [None]:
# Count the occurrences of each product code
product_counts = filtered_data_4['Product_Code'].value_counts()

# Create a bar plot for product distribution
plt.figure(figsize=(10, 6))
sns.barplot(x=product_counts.index, y=product_counts.values)
plt.title('Distribution of Product Codes')
plt.xlabel('Product Code')
plt.ylabel('Count')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()

# Model 4

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_resampled_daily)

In [None]:
# Prepare data for LSTM
def create_sequences(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps for LSTM
time_steps = 30  # e.g., 30 days
X, y = create_sequences(scaled_data, time_steps)

# Display the shapes of the data
print(X.shape, y.shape)


In [None]:
#Split the data into training and testing sets
split = int(len(X) * 0.9)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]



# Display the shapes of the training and testing data
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)


In [None]:
# Define the model
model = Sequential()

# First LSTM Layer with corrected input shape
model.add(LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))

# Output Layer (for each product in multi-product forecasting)
model.add(Dense(X_train.shape[2]))  # Number of outputs (one per product)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.001)  # Adjust the learning rate if needed
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Print model summary
model.summary()



In [None]:


# Early stopping to stop training when validation loss doesn't improve after 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Fit the model
history = model.fit(
    X_train, y_train,  # Training data
    epochs=50,  # You can adjust the number of epochs based on your data and model's performance
    batch_size=32,  # Batch size, tune this for better results
    validation_data=(X_test, y_test),  # Validation data
    callbacks=[early_stopping],  # Early stopping callback
    verbose=1  # Shows the training process
)


In [None]:
# Evaluate training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.legend()

plt.show()

In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Display the shape of predictions
print(y_pred.shape)

In [None]:
# Inverse transform the predictions and actual values


y_pred_inverse = scaler.inverse_transform(y_pred)
y_test_inverse = scaler.inverse_transform(y_test)

# Display the shapes of inverse transformed predictions and actual values
print(y_pred_inverse.shape, y_test_inverse.shape)

In [None]:
filtered_data_4['Product_Code'].value_counts()

In [None]:
selected_products =  ['Product_1295', 'Product_1382', 'Product_1286', 'Product_1378',
        'Product_1359']

# Create a DataFrame for actual and predicted values
results_df = pd.DataFrame({
    'Date': data_pivot_4.index[-len(y_test):]  # Get the corresponding dates for testing
})

# Extract actual and predicted values for each selected product
for product in selected_products:
    product_index = data_pivot_4.columns.get_loc(product)  # Get the index of the product
    results_df[f'Actual_{product}'] = y_test_inverse[:, product_index]  # Actual values
    results_df[f'Predicted_{product}'] = y_pred_inverse[:, product_index]  # Predicted values

# Set 'Date' as index
results_df.set_index('Date', inplace=True)

# Resample to monthly frequency (sum of orders in each month)
results_df_monthly = results_df.resample('M').sum()

# Plot actual vs predicted for each selected product (monthly)
plt.figure(figsize=(15, 10))
for i, product in enumerate(selected_products):
    plt.subplot(len(selected_products), 1, i + 1)
    plt.plot(results_df_monthly.index, results_df_monthly[f'Actual_{product}'],
             label='Actual', color='blue')
    plt.plot(results_df_monthly.index, results_df_monthly[f'Predicted_{product}'],
             label='Predicted', color='orange')
    plt.title(f'Monthly Actual vs Predicted for {product}')
    plt.xlabel('Date')
    plt.ylabel('Order Demand')
    plt.legend()

plt.tight_layout()
plt.show()

# Forcasting

In [None]:
# Forecast for the next 40 days
def forecast_next_days(model, last_data, days=40):
    predictions = []
    current_data = last_data
    for _ in range(days):
        next_pred = model.predict(current_data.reshape(1, time_steps, current_data.shape[1]))
        predictions.append(next_pred[0])
        current_data = np.append(current_data[1:], [next_pred[0]], axis=0)  # Update the input for the next prediction
    return np.array(predictions)

# Prepare the last input data from the training set
last_input = scaled_data[-time_steps:]

# Generate the forecast
forecast = forecast_next_days(model, last_input)


In [None]:
#Inverse transform the scaled predictions to get actual values
forecast_inverse = scaler.inverse_transform(forecast)

#Create a DataFrame for the forecast results
forecast_dates = pd.date_range(start=data_resampled_daily.index[-1] + pd.Timedelta(days=1), periods=40)
forecast_df = pd.DataFrame(data=forecast_inverse, index=forecast_dates, columns=data_resampled_daily.columns)

# Display the forecast results
print(forecast_df)


In [None]:
# Define the products to visualize
selected_products =  ['Product_1295', 'Product_1382', 'Product_1286', 'Product_1378',
        'Product_1359']
# Filter the forecast_df to include only the selected products
forecast_selected_df = forecast_df[selected_products]

# Resample the data to monthly frequency (month-end)
forecast_monthly_df = forecast_selected_df.resample('W').sum()  # You can also use 'mean' instead of 'sum' if needed

# Visualize the forecast results for the selected products on a monthly basis
plt.figure(figsize=(12, 6))
for product in forecast_selected_df.columns:
    plt.plot(forecast_selected_df.index, forecast_selected_df[product], label=product)
plt.title('Monthly Forecasted Demand for Selected Products')
plt.xlabel('Date')
plt.ylabel('Forecasted Order Demand')
plt.legend()
plt.show()


# Model For Our Factory Order Demand

In [None]:
data['Order_Demand'] = data['Order_Demand'].str.replace('(',"")
data['Order_Demand'] = data['Order_Demand'].str.replace(')',"")
data['Order_Demand'] = data['Order_Demand'].astype('int64')

In [None]:
data['Date'] = pd.to_datetime(data['Date'])

# Extract the year from the 'Date' column
data['Year'] = data['Date'].dt.year

# Group by 'Year' and calculate the total demand for each year
demand_per_year = data.groupby('Year')['Order_Demand'].sum()

# Display the total demand for each year
print(demand_per_year)

In [None]:
demand_per_year

In [None]:
import matplotlib.pyplot as plt

# Convert 'Date' column to datetime if not done already
data['Date'] = pd.to_datetime(data['Date'])

# Create a new column for year and month
data['YearMonth'] = data['Date'].dt.to_period('M')  # Year and Month as a period (e.g., '2020-01')

# Group by 'YearMonth' and calculate the total demand for each month
demand_per_month = data.groupby('YearMonth')['Order_Demand'].sum()

# Split the data by year and create separate plots
years = [2013, 2014, 2015, 2016]  # Years to plot

# Set up a figure with 4 subplots, one for each year
plt.figure(figsize=(12, 10))

for i, year in enumerate(years):
    # Filter demand_per_month for the current year
    demand_year = demand_per_month[demand_per_month.index.year == year]

    # Create a subplot for the current year
    plt.subplot(4, 1, i + 1)
    demand_year.plot(kind='line', marker='o', color='b')

    # Customize each subplot
    plt.title(f'Total Demand for {year}')
    plt.xlabel('Month')
    plt.ylabel('Total Order Demand')
    plt.grid(True)
    plt.xticks(rotation=45)

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
data

In [None]:
dataaa =  data.drop(['Warehouse' ,'Year' , 'YearMonth' , 'Product_Category' ] , axis = 1)

In [None]:
# Preprocess the dataset
dataaa['Date'] = pd.to_datetime(dataaa['Date'])  # Ensure Date column is datetime
dataaa.sort_values('Date', inplace=True)  # Sort by date

# Ensure Order_Demand is numeric
dataaa['Order_Demand'] = pd.to_numeric(dataaa['Order_Demand'], errors='coerce')

# Display data types and check for null values
print(dataaa.dtypes)
print(dataaa.isnull().sum())



In [None]:
# Drop rows with missing dates
dataaa.dropna(subset=['Date'], inplace=True)

# For Order_Demand, we can either drop the missing values or fill them
# Here, we will drop rows with missing Order_Demand values
dataaa.dropna(subset=['Order_Demand'], inplace=True)

# Check if there are still missing vaues
print(dataaa.isnull().sum())


In [None]:
dataTest = dataaa.copy()

In [None]:
# Extract year from the Date column
dataTest['Year'] = dataaa['Date'].dt.year

# Group by the Year and count the number of rows for each year
rows_per_year = dataTest.groupby('Year').size()

# Display the number of rows per year
print(rows_per_year)

In [None]:
# Assuming 'Date' column is already in datetime format
dataaa['Year'] = dataaa['Date'].dt.year

# Filter the data for the years between 2014 and 2016 (inclusive)
filtered_data_all = dataaa[(dataaa['Year'] >= 2013) & (dataaa['Year'] <= 2016)]

# Drop the 'Year' column as it was only used for filtering
filtered_data_all.drop(columns=['Year'], inplace=True)


In [None]:
filtered_data_all['Product_Code'].value_counts()

In [None]:
product_code_counts = filtered_data_all['Product_Code'].value_counts()

# Filter to get only those Product_Codes with counts greater than 500
product_codes_greater_than_200 = product_code_counts[product_code_counts > 200]

# Display the result
print(product_codes_greater_than_200)

In [None]:
threshold = 200

# Get products that occur more than the threshold
frequent_products = filtered_data_all['Product_Code'].value_counts()[filtered_data_all['Product_Code'].value_counts() > threshold].index

# Filter the dataset to keep only the frequent products
filtered_data_all = filtered_data_all[filtered_data_all['Product_Code'].isin(frequent_products)]

# Display the filtered data
filtered_data_all.Product_Code.value_counts()


In [None]:
#  Aggregate the data by Product_Code and Date
data_aggregated_all = filtered_data_all.groupby(['Product_Code', 'Date']).sum().reset_index()

# Display the aggregated data
data_aggregated_all['Product_Code'].value_counts()


In [None]:
data_pivot_all = data_aggregated_all.pivot(index='Date', columns='Product_Code', values='Order_Demand').fillna(0)

# Display the pivoted data
print(data_pivot_all)

# Resample the pivoted data weekly and fill missing values with 0
data_pivot_resampled = data_pivot_all.resample('W').sum().fillna(0)


In [None]:
# Step 1: Resample to weekly data (sum of daily demand)
data_resampled_weekly = data_pivot_all.resample('W').sum()


# Step 2: Repeat the weekly values for 7 days and divide by 7
# Reindex the daily range to fill all missing days
daily_index = pd.date_range(start=data_resampled_weekly.index.min(),
                            end=data_resampled_weekly.index.max(), freq='D')

# Expand the weekly data to daily, assigning the weekly total to each day of the week
data_resampled_daily = data_resampled_weekly.reindex(daily_index).ffill() / 7

# Step 3: Display the resampled daily data
print(data_resampled_daily.head())

In [None]:
data_resampled_daily

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_resampled_daily)

In [None]:
# Prepare data for LSTM
def create_sequences(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps for LSTM
time_steps = 30  # e.g., 30 days
X, y = create_sequences(scaled_data, time_steps)

# Display the shapes of the data
print(X.shape, y.shape)


In [None]:
#Split the data into training and testing sets
split = int(len(X) * 0.9)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


# Display the shapes of the training and testing data
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

In [None]:
# Define the model
model = Sequential()

# First LSTM Layer with corrected input shape
model.add(LSTM(100, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))

# Output Layer (for each product in multi-product forecasting)
model.add(Dense(X_train.shape[2]))  # Number of outputs (one per product)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.001)  # Adjust the learning rate if needed
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Print model summary
model.summary()



In [None]:


# Early stopping to stop training when validation loss doesn't improve after 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Fit the model
history = model.fit(
    X_train, y_train,  # Training data
    epochs=50,  # You can adjust the number of epochs based on your data and model's performance
    batch_size=32,  # Batch size, tune this for better results
    validation_data=(X_test, y_test),  # Validation data
    callbacks=[early_stopping],  # Early stopping callback
    verbose=1  # Shows the training process
)







In [None]:
# Evaluate training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# Make predictions
y_pred = model.predict(X_test)


# Display the shape of predictions

print(y_pred.shape)

In [None]:
# Inverse transform the predictions and actual values
y_pred_inverse = scaler.inverse_transform(y_pred)
y_test_inverse = scaler.inverse_transform(y_test)

# Display the shapes of inverse transformed predictions and actual values
print(y_pred_inverse.shape, y_test_inverse.shape)

In [None]:
selected_products =  ['Product_1263', 'Product_0374', 'Product_0349', 'Product_0033',
        'Product_1341']

# Create a DataFrame for actual and predicted values
results_df = pd.DataFrame({
    'Date': data_resampled_daily.index[-len(y_test):]  # Get the corresponding dates for testing
})

# Extract actual and predicted values for each selected product
for product in selected_products:
    product_index = data_resampled_daily.columns.get_loc(product)  # Get the index of the product
    results_df[f'Actual_{product}'] = y_test_inverse[:, product_index]  # Actual values
    results_df[f'Predicted_{product}'] = y_pred_inverse[:, product_index]  # Predicted values

# Set 'Date' as index
results_df.set_index('Date', inplace=True)

# Resample to monthly frequency (sum of orders in each month)
results_df_monthly = results_df.resample('M').sum()

# Plot actual vs predicted for each selected product (monthly)
plt.figure(figsize=(15, 10))
for i, product in enumerate(selected_products):
    plt.subplot(len(selected_products), 1, i + 1)
    plt.plot(results_df_monthly.index, results_df_monthly[f'Actual_{product}'],
             label='Actual', color='blue')
    plt.plot(results_df_monthly.index, results_df_monthly[f'Predicted_{product}'],
             label='Predicted', color='orange')
    plt.title(f'Monthly Actual vs Predicted for {product}')
    plt.xlabel('Date')
    plt.ylabel('Order Demand')
    plt.legend()

plt.tight_layout()
plt.show()

# Forcasting

In [None]:
# Forecast for the next 40 days
def forecast_next_days(model, last_data, days=40):
    predictions = []
    current_data = last_data
    for _ in range(days):
        next_pred = model.predict(current_data.reshape(1, time_steps, current_data.shape[1]))
        predictions.append(next_pred[0])
        current_data = np.append(current_data[1:], [next_pred[0]], axis=0)  # Update the input for the next prediction
    return np.array(predictions)

# Prepare the last input data from the training set
last_input = scaled_data[-time_steps:]

# Generate the forecast
forecast = forecast_next_days(model, last_input)


In [None]:
#Inverse transform the scaled predictions to get actual values
forecast_inverse = scaler.inverse_transform(forecast)

#Create a DataFrame for the forecast results
forecast_dates = pd.date_range(start=data_resampled_daily.index[-1] + pd.Timedelta(days=1), periods=40)
forecast_df = pd.DataFrame(data=forecast_inverse, index=forecast_dates, columns=data_resampled_daily.columns)

# Display the forecast results
print(forecast_df)


In [None]:
# Define the products to visualize
selected_products =  ['Product_1263', 'Product_0374', 'Product_0349', 'Product_0033','Product_1341']
# Filter the forecast_df to include only the selected products
forecast_selected_df = forecast_df[selected_products]

# Resample the data to monthly frequency (month-end)
forecast_monthly_df = forecast_selected_df.resample('W').sum()  # You can also use 'mean' instead of 'sum' if needed

# Visualize the forecast results for the selected products on a monthly basis
plt.figure(figsize=(12, 6))
for product in forecast_selected_df.columns:
    plt.plot(forecast_selected_df.index, forecast_selected_df[product], label=product)
plt.title('Monthly Forecasted Demand for Selected Products')
plt.xlabel('Date')
plt.ylabel('Forecasted Order Demand')
plt.legend()
plt.show()
