In [3]:
import pandas as pd
import os

# Preparing the data.

In [4]:
data_root = os.path.join("..","Data")
sales_df = pd.read_excel(os.path.join(data_root, 'DEMAND_FILE.xlsx'), sheet_name='Daily CGD Volume - Ahmedabad')
sales_df = sales_df.iloc[0:3].T.reset_index(drop=True)
sales_df.columns = ['Date','Weekday','Sales']
sales_df = sales_df.drop(index = 0).reset_index(drop=True)
sales_df['Weekday'] = sales_df['Weekday'].apply(lambda x: 0 if x in ['Sunday', 'Saturday'] else 1)
sales_df['Date'] = pd.to_datetime(sales_df['Date']).dt.date

for i in range(1, 8):  # Loop to create 7 new columns
    sales_df[f'Sales_Day_{i}'] = sales_df['Sales'].shift(i)
sales_df = sales_df.dropna().reset_index(drop=True)
sales_df

Unnamed: 0,Date,Weekday,Sales,Sales_Day_1,Sales_Day_2,Sales_Day_3,Sales_Day_4,Sales_Day_5,Sales_Day_6,Sales_Day_7
0,2023-04-08,0,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,463311.3997,491107.744591,489076.113688
1,2023-04-09,0,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,463311.3997,491107.744591
2,2023-04-10,1,483230.277816,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,463311.3997
3,2023-04-11,1,485970.14154,483230.277816,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432
4,2023-04-12,1,484442.664286,485970.14154,483230.277816,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138
...,...,...,...,...,...,...,...,...,...,...
263,2023-12-27,1,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,586486.401692,587184.746016,583960.393272
264,2023-12-28,1,576890.976666,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,586486.401692,587184.746016
265,2023-12-29,1,572104.814138,576890.976666,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,586486.401692
266,2023-12-30,0,585843.459244,572104.814138,576890.976666,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448


In [5]:
weather_df = pd.read_csv(os.path.join(data_root,'Ahmedabad_Weather_Data_Cleaned.csv')).drop(range(0,7)).reset_index(drop=True)
weather_df

Unnamed: 0,Date,Temp,Humidity,Rain
0,2023-04-08,88.9,29.8,0.0
1,2023-04-09,89.9,29.1,0.0
2,2023-04-10,91.4,28.9,0.0
3,2023-04-11,92.0,30.7,0.0
4,2023-04-12,90.0,33.3,0.0
...,...,...,...,...
263,2023-12-27,71.2,72.4,0.0
264,2023-12-28,72.5,60.0,0.0
265,2023-12-29,71.4,64.1,0.0
266,2023-12-30,71.9,69.3,0.0


In [6]:
combined_df = pd.concat([sales_df,weather_df],axis = 1)
combined_df

Unnamed: 0,Date,Weekday,Sales,Sales_Day_1,Sales_Day_2,Sales_Day_3,Sales_Day_4,Sales_Day_5,Sales_Day_6,Sales_Day_7,Date.1,Temp,Humidity,Rain
0,2023-04-08,0,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,463311.3997,491107.744591,489076.113688,2023-04-08,88.9,29.8,0.0
1,2023-04-09,0,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,463311.3997,491107.744591,2023-04-09,89.9,29.1,0.0
2,2023-04-10,1,483230.277816,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,463311.3997,2023-04-10,91.4,28.9,0.0
3,2023-04-11,1,485970.14154,483230.277816,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,456306.3432,2023-04-11,92.0,30.7,0.0
4,2023-04-12,1,484442.664286,485970.14154,483230.277816,478717.394779,542228.194163,409583.985034,494010.905488,493338.999138,2023-04-12,90.0,33.3,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,2023-12-27,1,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,586486.401692,587184.746016,583960.393272,2023-12-27,71.2,72.4,0.0
264,2023-12-28,1,576890.976666,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,586486.401692,587184.746016,2023-12-28,72.5,60.0,0.0
265,2023-12-29,1,572104.814138,576890.976666,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,586486.401692,2023-12-29,71.4,64.1,0.0
266,2023-12-30,0,585843.459244,572104.814138,576890.976666,571390.999704,599715.663484,592294.579128,556015.517748,565619.81448,2023-12-30,71.9,69.3,0.0


# Training the Model.

In [7]:
features = ['Weekday','Sales_Day_1', 'Sales_Day_2', 'Sales_Day_3',
       'Sales_Day_4', 'Sales_Day_5', 'Sales_Day_6', 'Sales_Day_7',
       'Temp', 'Humidity', 'Rain',]
target = 'Sales'

train_size = int(0.7 * len(combined_df))  # 70% for training
val_size = int(0.85 * len(combined_df)) # 15% for validation
train_df, val_df, test_df = combined_df[:train_size], combined_df[train_size:val_size], combined_df[val_size:]

X_train, y_train = train_df[features].values.astype('float'), train_df[target].values.astype('float')
X_val, y_val = val_df[features].values.astype('float'), val_df[target].values.astype('float')


In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.losses import Huber
from tensorflow.keras.callbacks import EarlyStopping

def mape(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Avoid division by zero by adding a small epsilon where y_true is zero
    epsilon = tf.keras.backend.epsilon()
    y_true = tf.where(tf.equal(y_true, 0), epsilon, y_true)

    return tf.reduce_mean(tf.abs((y_true - y_pred) / y_true)) * 100

early_stopping = EarlyStopping(
    monitor='val_mape',
    mode = 'min',
    patience=15,
    restore_best_weights=True )

# Define FFNN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

# Compile and train
model.compile(optimizer='adam', loss='mse' , metrics=[mape])
history = model.fit(X_train, y_train, epochs=100, batch_size=8, validation_data=(X_val, y_val),callbacks=[early_stopping])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 52515131392.0000 - mape: 37.1322 - val_loss: 7086704128.0000 - val_mape: 12.6824
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2129038336.0000 - mape: 9.1062 - val_loss: 3277701632.0000 - val_mape: 6.8836
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1400737024.0000 - mape: 7.9611 - val_loss: 2818854144.0000 - val_mape: 6.8195
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 985496256.0000 - mape: 7.9759 - val_loss: 3081659392.0000 - val_mape: 6.7615
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1293849856.0000 - mape: 8.1208 - val_loss: 3047203584.0000 - val_mape: 6.7340
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 834516800.0000 - mape: 7.1832 - val_loss: 2811440

In [None]:
import plotly.graph_objects as go

history_df = pd.DataFrame.from_dict(history.history)
history_df['epoch'] = history_df.index + 1

fig = go.Figure()
fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['loss'], mode = 'lines+markers', name='Training Loss'))
fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['val_loss'], mode = 'lines+markers', name = 'Validation Loss'))
fig.update_layout(title="Training and Validation Loss",
                  xaxis_title="Epochs", yaxis_title="Loss")
fig.show()

fig = go.Figure()
fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['mape'], mode = 'lines+markers', name='Training MAPE'))
fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['val_mape'], mode = 'lines+markers', name = 'Validation MAPE'))
fig.update_layout(title="Training and Validation MAPE",
                  xaxis_title="Epochs", yaxis_title="MAPE")
fig.show()