In [61]:
import pandas as pd
import os
import numpy as np

In [135]:
root = os.path.join("..","Data")
df = pd.read_excel(os.path.join(root, "DEMAND_FILE_PHASE2.xlsx"))
df = df.iloc[0:2].T.reset_index(drop=True)
df.columns = ['Date','Sales']
df.drop(0, inplace=True)
df['Sales'] = df['Sales'].astype('float')
df['Date'] = pd.to_datetime(df['Date'])
full_date_range = pd.date_range(start=df['Date'].min(), end=df['Date'].max(), freq='D')
df = df.set_index('Date').reindex(full_date_range)
df['Sales'] = df['Sales'].ffill()
df.reset_index(inplace=True)
df.rename(columns={'index': 'Date'}, inplace=True)
train_df = df[df['Date']<"2024-01-01"].reset_index(drop=True)
test_df = df[df['Date'] >= "2024-01-01"].reset_index(drop=True)
    

In [136]:
train_df

Unnamed: 0,Date,Sales
0,2020-04-01,27116.257595
1,2020-04-02,28710.388430
2,2020-04-03,30728.165560
3,2020-04-04,31766.156375
4,2020-04-05,24258.566380
...,...,...
1365,2023-12-27,272767.549675
1366,2023-12-28,268037.346080
1367,2023-12-29,268176.608700
1368,2023-12-30,298067.576955


In [137]:
root = os.path.join("..","Data")
df = pd.read_csv(os.path.join(root, "Crude Oil Prices.csv"))
df.columns = ['Date','Value']
df['Date'] = pd.to_datetime(df['Date'],format='%d-%m-%y')
df['Value'] = df['Value'].astype('float')

full_date_range = pd.date_range(start=df['Date'].min(), end=df['Date'].max(), freq='D')
df = df.set_index('Date').reindex(full_date_range)
df['Value'] = df['Value'].ffill()
df.reset_index(inplace=True)
df.rename(columns={'index': 'Date'}, inplace=True)
train_prices_df = df[(df['Date']>="2020-04-01") & (df['Date']<"2024-01-01")].reset_index(drop=True)
test_prices_df = df[df['Date'] >= "2024-01-01"].reset_index(drop=True)

In [131]:
train_prices_df

Unnamed: 0,Date,Value
0,2020-04-01,20.31
1,2020-04-02,25.32
2,2020-04-03,28.34
3,2020-04-04,28.34
4,2020-04-05,28.34
...,...,...
1365,2023-12-27,74.11
1366,2023-12-28,71.77
1367,2023-12-29,71.33
1368,2023-12-30,71.33


In [139]:
def create_sliding_window_sets(sales_df,prices_df, window_size=30):
    sales = sales_df['Sales'].values
    dates = sales_df['Date'].values
    prices = prices_df['Value'].values
    X, Y, X_dates = [], [], []

    for i in range(window_size, len(sales) - window_size):
        x_window = np.concatenate((sales[i - window_size:i],prices[i - window_size:i]))        # previous 30 days
        y_window = sales[i + 1:i + 1 + window_size] # next 30 days
        today_date = dates[i]                       # today

        X.append(x_window)
        Y.append(y_window)
        X_dates.append(today_date)

    return np.array(X), np.array(Y), np.array(X_dates)


In [142]:
train_prices_df

Unnamed: 0,Date,Value
0,2020-04-01,20.31
1,2020-04-02,25.32
2,2020-04-03,28.34
3,2020-04-04,28.34
4,2020-04-05,28.34
...,...,...
1365,2023-12-27,74.11
1366,2023-12-28,71.77
1367,2023-12-29,71.33
1368,2023-12-30,71.33


In [150]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

num_days = 30
X_train, Y_train, dates_train = create_sliding_window_sets(train_df,train_prices_df,num_days)
X_test, Y_test, dates_test = create_sliding_window_sets(test_df,test_prices_df,num_days)

def mape(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Avoid division by zero by adding a small epsilon where y_true is zero
    epsilon = tf.keras.backend.epsilon()
    y_true = tf.where(tf.equal(y_true, 0), epsilon, y_true)

    return tf.reduce_mean(tf.abs((y_true - y_pred) / y_true)) * 100

# Build FFNN model
model = models.Sequential([
    layers.Input(shape=(num_days*2,)),
    layers.Dense(128, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_days) 
])
early_stopping = EarlyStopping(
monitor='mape',
mode = 'min',
patience=15,
restore_best_weights=True )

model.compile(optimizer='adam', loss='mse', metrics=[mape])

# Train the model
history = model.fit(X_train, Y_train, epochs=100, batch_size=4, verbose=1,callbacks=[early_stopping])


Epoch 1/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 10011797504.0000 - mape: 29.9328
Epoch 2/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 2998282240.0000 - mape: 21.7928
Epoch 3/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 2941817088.0000 - mape: 21.7579
Epoch 4/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 2630269440.0000 - mape: 19.4431
Epoch 5/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 3064475648.0000 - mape: 21.9916
Epoch 6/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 2800909568.0000 - mape: 20.8206
Epoch 7/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 2834983424.0000 - mape: 19.9873
Epoch 8/100
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 2611241216.0000

In [66]:
Y_pred = model.predict(X_test, verbose=0)

In [152]:
import plotly.graph_objects as go

start_date = dates_test[0]
date = '2024-02-10' # can go upto 2024-12-01
i = (pd.to_datetime(date) - pd.to_datetime(start_date)).days

x = pd.date_range(start=pd.to_datetime(date) + pd.Timedelta(days=1), periods=30)
fig = go.Figure()

fig.add_trace(go.Scatter(x=x, y=Y_pred[i], mode='lines+markers', name=f'Prediction'))
fig.add_trace(go.Scatter(x=x, y=Y_test[i], mode='lines+markers', name=f'True values'))
fig.update_layout(title=f'Prediction at {date}. Mape = {mape(Y_pred[i],Y_test[i])}', legend_title='Legend')
fig.show()


In [81]:
import plotly.graph_objects as go

mapes=[]
for i in range(len(Y_pred)):
    mapes.append(mape(Y_pred[i],Y_test[i]))
mapes=np.array(mapes)
print("The mean MAPE is :",mapes.mean())
print("The median MAPE is : ", np.median(mapes))

fig = go.Figure(data=[go.Histogram(x=mapes, nbinsx=50)])
fig.update_layout(title=f'Histogram of Average MAPE. Average MAPE = {mapes.mean()}', xaxis_title='Daily MAPE', yaxis_title='Count')
fig.show()

The mean MAPE is : 10.939718
The median MAPE is :  9.339051


In [82]:
import plotly.graph_objects as go

dates = pd.date_range(start=dates_test[0], periods=len(X_test))
mape_df = pd.DataFrame({'Date':dates,'Mape': mapes})

fig = go.Figure()
fig.add_trace(go.Line(x = mape_df['Date'],
                    y = mape_df['Mape'],
                    mode = 'lines+markers',
                    name = 'Average MAPE'))

fig.update_layout(xaxis_title = 'Date', yaxis_title = 'MAPE', title = 'MAPE vs Date')
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


