# Demand Forecasting (regression)

In [None]:
import pandas as pd
data = pd.read_csv('demand.csv')
data.head(), data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1029518 entries, 0 to 1029517
Data columns (total 5 columns):
 #   Column            Non-Null Count    Dtype 
---  ------            --------------    ----- 
 0   Product_Code      1029518 non-null  object
 1   Warehouse         1029518 non-null  object
 2   Product_Category  1029517 non-null  object
 3   Date              1018278 non-null  object
 4   Order_Demand      1029517 non-null  object
dtypes: object(5)
memory usage: 39.3+ MB


(   Product_Code Warehouse Product_Category       Date Order_Demand
 0  Product_0993    Whse_J     Category_028  2012/7/27         100 
 1  Product_0979    Whse_J     Category_028  2012/1/19         500 
 2  Product_0979    Whse_J     Category_028   2012/2/3         500 
 3  Product_0979    Whse_J     Category_028   2012/2/9         500 
 4  Product_0979    Whse_J     Category_028   2012/3/2         500 ,
 None)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Remove non-numeric characters and convert Order_Demand to numeric
data['Order_Demand'] = pd.to_numeric(data['Order_Demand'].str.replace('[^0-9]', '', regex=True))

# Drop rows with missing values in Date or Order_Demand columns
data = data.dropna(subset=['Date', 'Order_Demand'])

# Label encode categorical features
label_encoder = LabelEncoder()
data['Product_Code'] = label_encoder.fit_transform(data['Product_Code'])
data['Warehouse'] = label_encoder.fit_transform(data['Warehouse'])
data['Product_Category'] = label_encoder.fit_transform(data['Product_Category'])

# Drop the Date column for simplicity (or we could extract date features later if needed)
data = data.drop(columns=['Date'])

# Split data into features (X) and target (y)
X = data.drop(columns=['Order_Demand'])
y = data['Order_Demand']

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape


((814622, 3), (203656, 3), (814622,), (203656,))

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('demand.csv')

# Remove non-numeric characters and convert Order_Demand to numeric
data['Order_Demand'] = pd.to_numeric(data['Order_Demand'].str.replace('[^0-9]', '', regex=True))

# Drop rows with missing values in Date or Order_Demand columns
data = data.dropna(subset=['Date', 'Order_Demand'])

# Label encode categorical features
label_encoder = LabelEncoder()
data['Product_Code'] = label_encoder.fit_transform(data['Product_Code'])
data['Warehouse'] = label_encoder.fit_transform(data['Warehouse'])
data['Product_Category'] = label_encoder.fit_transform(data['Product_Category'])

# Drop the Date column
data = data.drop(columns=['Date'])

# Split data into features (X) and target (y)
X = data.drop(columns=['Order_Demand'])
y = data['Order_Demand']

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model
model = tf.keras.models.Sequential()

# Input layer and first hidden layer
model.add(tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))

# Second hidden layer
model.add(tf.keras.layers.Dense(32, activation='relu'))

# Output layer
model.add(tf.keras.layers.Dense(1, activation='linear'))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Evaluate the model on test data
loss, mae = model.evaluate(X_test, y_test)

# Get model weights and biases
weights = model.get_weights()

print("Mean Absolute Error on Test Set:", mae)
print("Weights and Biases:", weights)


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m25457/25457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2ms/step - loss: 848787264.0000 - mae: 6923.2915 - val_loss: 708329984.0000 - val_mae: 7448.5234
Epoch 2/10
[1m25457/25457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 2ms/step - loss: 844987456.0000 - mae: 7023.0728 - val_loss: 704720768.0000 - val_mae: 7277.2812
Epoch 3/10
[1m25457/25457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 2ms/step - loss: 899160128.0000 - mae: 6904.3594 - val_loss: 701836352.0000 - val_mae: 6356.4160
Epoch 4/10
[1m25457/25457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 2ms/step - loss: 833016128.0000 - mae: 6887.4883 - val_loss: 701095360.0000 - val_mae: 6349.6035
Epoch 5/10
[1m25457/25457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 2ms/step - loss: 924110848.0000 - mae: 6872.3809 - val_loss: 706111360.0000 - val_mae: 7917.6426
Epoch 6/10
[1m25457/25457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 2ms/step - loss: 840091072.0000 - 