In [1]:
import pandas as pd
import os

# Preparing the data.

In [6]:
root = os.path.join("..","Data")
df = pd.read_excel(os.path.join(root, "DEMAND_FILE_PHASE2.xlsx"))
df = df.iloc[0:2].T.reset_index(drop=True)
df.columns = ['Date','Sales']
df.drop(0, inplace=True)
df['Sales'] = df['Sales'].astype('float')
df['Date'] = pd.to_datetime(df['Date'])
df


Unnamed: 0,Date,Sales
1,2020-04-01,27116.257595
2,2020-04-02,28710.388430
3,2020-04-03,30728.165560
4,2020-04-04,31766.156375
5,2020-04-05,24258.566380
...,...,...
1728,2024-12-27,267775.000000
1729,2024-12-28,274362.000000
1730,2024-12-29,244200.000000
1731,2024-12-30,274584.000000


In [7]:
import calendar
def days_in_month(row):
    return calendar.monthrange(row['Year'], row['Month'])[1]

df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.weekday
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['DayOfYear'] = df['Date'].dt.dayofyear
df['IsWeekend'] = (df['Weekday'] >= 5).astype(int)
df['DaysInMonth'] = df.apply(days_in_month, axis=1)
df


Unnamed: 0,Date,Sales,Day,Weekday,Month,Year,DayOfYear,IsWeekend,DaysInMonth
1,2020-04-01,27116.257595,1,2,4,2020,92,0,30
2,2020-04-02,28710.388430,2,3,4,2020,93,0,30
3,2020-04-03,30728.165560,3,4,4,2020,94,0,30
4,2020-04-04,31766.156375,4,5,4,2020,95,1,30
5,2020-04-05,24258.566380,5,6,4,2020,96,1,30
...,...,...,...,...,...,...,...,...,...
1728,2024-12-27,267775.000000,27,4,12,2024,362,0,31
1729,2024-12-28,274362.000000,28,5,12,2024,363,1,31
1730,2024-12-29,244200.000000,29,6,12,2024,364,1,31
1731,2024-12-30,274584.000000,30,0,12,2024,365,0,31


In [8]:
import numpy as np
# Apply Cyclical Encoding
df['Day_sin'] = np.sin(2 * np.pi * df['Day'] / df['DaysInMonth'])
df['Day_cos'] = np.cos(2 * np.pi * df['Day'] / df['DaysInMonth'])

df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)

df['Weekday_sin'] = np.sin(2 * np.pi * df['Weekday'] / 7)
df['Weekday_cos'] = np.cos(2 * np.pi * df['Weekday'] / 7)

df['DayOfYear_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
df['DayOfYear_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)

# Drop raw columns if necessary
df = df.drop(columns=['Weekday', 'Month', 'DayOfYear', 'DaysInMonth','Day','Year'])
df

Unnamed: 0,Date,Sales,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos
1,2020-04-01,27116.257595,0,2.079117e-01,0.978148,8.660254e-01,-0.5,0.974928,-0.222521,9.999167e-01,-0.012910
2,2020-04-02,28710.388430,0,4.067366e-01,0.913545,8.660254e-01,-0.5,0.433884,-0.900969,9.995463e-01,-0.030120
3,2020-04-03,30728.165560,0,5.877853e-01,0.809017,8.660254e-01,-0.5,-0.433884,-0.900969,9.988797e-01,-0.047321
4,2020-04-04,31766.156375,1,7.431448e-01,0.669131,8.660254e-01,-0.5,-0.974928,-0.222521,9.979172e-01,-0.064508
5,2020-04-05,24258.566380,1,8.660254e-01,0.500000,8.660254e-01,-0.5,-0.781831,0.623490,9.966589e-01,-0.081676
...,...,...,...,...,...,...,...,...,...,...,...
1728,2024-12-27,267775.000000,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,-0.433884,-0.900969,-5.161967e-02,0.998667
1729,2024-12-28,274362.000000,1,-5.712682e-01,0.820763,-2.449294e-16,1.0,-0.974928,-0.222521,-3.442161e-02,0.999407
1730,2024-12-29,244200.000000,1,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.781831,0.623490,-1.721336e-02,0.999852
1731,2024-12-30,274584.000000,0,-2.012985e-01,0.979530,-2.449294e-16,1.0,0.000000,1.000000,6.432491e-16,1.000000


In [9]:
df['Sales_MA_7'] = df['Sales'].rolling(window=7).mean().shift(1)  # 7-day moving avg
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,Date,Sales,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos,Sales_MA_7
0,2020-04-08,37042.335435,0,9.945219e-01,-0.104528,8.660254e-01,-0.5,0.974928,-0.222521,9.911141e-01,-0.133015,27235.338575
1,2020-04-09,37964.938745,0,9.510565e-01,-0.309017,8.660254e-01,-0.5,0.433884,-0.900969,9.886776e-01,-0.150055,28653.349695
2,2020-04-10,44276.781385,0,8.660254e-01,-0.500000,8.660254e-01,-0.5,-0.433884,-0.900969,9.859481e-01,-0.167052,29975.428311
3,2020-04-11,47721.269465,1,7.431448e-01,-0.669131,8.660254e-01,-0.5,-0.974928,-0.222521,9.829266e-01,-0.183998,31910.944858
4,2020-04-12,42818.685600,1,5.877853e-01,-0.809017,8.660254e-01,-0.5,-0.781831,0.623490,9.796137e-01,-0.200891,34190.246728
...,...,...,...,...,...,...,...,...,...,...,...,...
1720,2024-12-27,267775.000000,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,-0.433884,-0.900969,-5.161967e-02,0.998667,256319.714286
1721,2024-12-28,274362.000000,1,-5.712682e-01,0.820763,-2.449294e-16,1.0,-0.974928,-0.222521,-3.442161e-02,0.999407,253353.000000
1722,2024-12-29,244200.000000,1,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.781831,0.623490,-1.721336e-02,0.999852,252778.714286
1723,2024-12-30,274584.000000,0,-2.012985e-01,0.979530,-2.449294e-16,1.0,0.000000,1.000000,6.432491e-16,1.000000,256156.000000


In [10]:
sales_df = df

In [7]:
weather_df = pd.read_csv(os.path.join(root,'Ahmedabad_Weather_Data_Cleaned.csv')).drop(range(0,7)).reset_index(drop=True)
weather_df

Unnamed: 0,Date,Temp,Humidity,Rain
0,2023-04-08,88.9,29.8,0.0
1,2023-04-09,89.9,29.1,0.0
2,2023-04-10,91.4,28.9,0.0
3,2023-04-11,92.0,30.7,0.0
4,2023-04-12,90.0,33.3,0.0
...,...,...,...,...
263,2023-12-27,71.2,72.4,0.0
264,2023-12-28,72.5,60.0,0.0
265,2023-12-29,71.4,64.1,0.0
266,2023-12-30,71.9,69.3,0.0


In [8]:
combined_df = pd.concat([sales_df,weather_df.iloc[:,1:]],axis = 1)
combined_df = combined_df[['Date','IsWeekend', 'Day_sin', 'Day_cos', 'Month_sin', 'Month_cos', 'Weekday_sin',
                           'Weekday_cos', 'DayOfYear_sin', 'DayOfYear_cos','Temp','Humidity','Rain','Sales_MA_7',
                           'Sales']]
combined_df = combined_df.dropna().reset_index(drop=True)
combined_df

Unnamed: 0,Date,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos,Temp,Humidity,Rain,Sales_MA_7,Sales
0,2023-04-08,1,9.945219e-01,-0.104528,8.660254e-01,-0.5,-0.974928,-0.222521,9.932568e-01,-0.115935,88.9,29.8,0.0,470962.212977,542228.194163
1,2023-04-09,1,9.510565e-01,-0.309017,8.660254e-01,-0.5,-0.781831,0.623490,9.911141e-01,-0.133015,89.9,29.1,0.0,478555.367331,478717.394779
2,2023-04-10,0,8.660254e-01,-0.500000,8.660254e-01,-0.5,0.000000,1.000000,9.886776e-01,-0.150055,91.4,28.9,0.0,476785.317357,483230.277816
3,2023-04-11,0,7.431448e-01,-0.669131,8.660254e-01,-0.5,0.781831,0.623490,9.859481e-01,-0.167052,92.0,30.7,0.0,479630.871374,485970.141540
4,2023-04-12,0,5.877853e-01,-0.809017,8.660254e-01,-0.5,0.974928,-0.222521,9.829266e-01,-0.183998,90.0,33.3,0.0,483868.556851,484442.664286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,2023-12-27,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,0.974928,-0.222521,-6.880243e-02,0.997630,71.2,72.4,0.0,581611.016546,571390.999704
264,2023-12-28,0,-5.712682e-01,0.820763,-2.449294e-16,1.0,0.433884,-0.900969,-5.161967e-02,0.998667,72.5,60.0,0.0,579815.388893,576890.976666
265,2023-12-29,0,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.433884,-0.900969,-3.442161e-02,0.999407,71.4,64.1,0.0,578344.850415,572104.814138
266,2023-12-30,1,-2.012985e-01,0.979530,-2.449294e-16,1.0,-0.974928,-0.222521,-1.721336e-02,0.999852,71.9,69.3,0.0,576290.337907,585843.459244


In [11]:
combined_df = sales_df

In [21]:
combined_df

Unnamed: 0,Date,Sales,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos,Sales_MA_7
0,2020-04-08,37042.335435,0,9.945219e-01,-0.104528,8.660254e-01,-0.5,0.974928,-0.222521,9.911141e-01,-0.133015,27235.338575
1,2020-04-09,37964.938745,0,9.510565e-01,-0.309017,8.660254e-01,-0.5,0.433884,-0.900969,9.886776e-01,-0.150055,28653.349695
2,2020-04-10,44276.781385,0,8.660254e-01,-0.500000,8.660254e-01,-0.5,-0.433884,-0.900969,9.859481e-01,-0.167052,29975.428311
3,2020-04-11,47721.269465,1,7.431448e-01,-0.669131,8.660254e-01,-0.5,-0.974928,-0.222521,9.829266e-01,-0.183998,31910.944858
4,2020-04-12,42818.685600,1,5.877853e-01,-0.809017,8.660254e-01,-0.5,-0.781831,0.623490,9.796137e-01,-0.200891,34190.246728
...,...,...,...,...,...,...,...,...,...,...,...,...
1720,2024-12-27,267775.000000,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,-0.433884,-0.900969,-5.161967e-02,0.998667,256319.714286
1721,2024-12-28,274362.000000,1,-5.712682e-01,0.820763,-2.449294e-16,1.0,-0.974928,-0.222521,-3.442161e-02,0.999407,253353.000000
1722,2024-12-29,244200.000000,1,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.781831,0.623490,-1.721336e-02,0.999852,252778.714286
1723,2024-12-30,274584.000000,0,-2.012985e-01,0.979530,-2.449294e-16,1.0,0.000000,1.000000,6.432491e-16,1.000000,256156.000000


# Training the Model.

In [24]:

import plotly.graph_objects as go

def get_train_val_test_dfs(combined_df):
    train_df = combined_df[combined_df['Date']<"2024-01-01"].reset_index(drop=True)
    test_df = combined_df[combined_df['Date']>="2024-01-01"].reset_index(drop=True)
    return train_df, test_df

def plot_training_graphs(history):
    history_df = pd.DataFrame.from_dict(history.history)
    history_df['epoch'] = history_df.index + 1

    fig = go.Figure()
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['loss'], mode = 'lines+markers', name='Training Loss'))
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['val_loss'], mode = 'lines+markers', name = 'Validation Loss'))
    fig.update_layout(title="Training and Validation Loss",
                    xaxis_title="Epochs", yaxis_title="Loss")
    fig.show()

    fig = go.Figure()
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['mape'], mode = 'lines+markers', name='Training MAPE'))
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['val_mape'], mode = 'lines+markers', name = 'Validation MAPE'))
    fig.update_layout(title="Training and Validation MAPE",
                    xaxis_title="Epochs", yaxis_title="MAPE")
    fig.show()

def plot_test_graphs(test_df):
    fig = go.Figure()

    fig.add_trace(go.Line(x = test_df['Date'],
                        y = test_df['Sales'],
                        mode = 'lines+markers',
                        name = 'Actual Sales'))
    fig.add_trace(go.Line(x = test_df['Date'],
                        y = test_df['Predicted_Sales'],
                        mode = 'lines+markers',
                        name = 'Predicted Sales'))

    fig.update_layout(xaxis_title = 'Date', yaxis_title = 'Sales', title = 'CNG Sales')
    fig.show()



In [32]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Input
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.losses import Huber
from tensorflow.keras.callbacks import EarlyStopping

def mape(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Avoid division by zero by adding a small epsilon where y_true is zero
    epsilon = tf.keras.backend.epsilon()
    y_true = tf.where(tf.equal(y_true, 0), epsilon, y_true)

    return tf.reduce_mean(tf.abs((y_true - y_pred) / y_true)) * 100

def model_train(train_df, features, target):
    
    X_train, y_train = train_df[features].values, train_df[target].values
    # X_val, y_val = val_df[features].values, val_df[target].values
    
    early_stopping = EarlyStopping(
    monitor='mape',
    mode = 'min',
    patience=15,
    restore_best_weights=True )

    # Define FFNN model
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1)
    ])

    # Compile and train
    model.compile(optimizer='adam', loss='mse' , metrics=[mape])
    history = model.fit(X_train, y_train, epochs=100, batch_size=4, callbacks=[early_stopping])

    return model, history


In [15]:
import numpy as np

def model_test(model, test_df, features, target):
    test_df['Sales_MA_7'] = np.nan
    test_df['Predicted_Sales'] = np.nan

    for row in test_df.itertuples(index=True, name='Rows'):
        if row.Index >= 7:
            test_df.loc[row.Index, 'Sales_MA_7'] = test_df.iloc[row.Index-7:row.Index]['Sales'].mean()
            X_test = np.array(test_df.loc[row.Index, features].values, dtype = np.float64)
            test_df.loc[row.Index, 'Predicted_Sales'] = model.predict(X_test.reshape(1,len(features)))
    test_df = test_df.dropna().reset_index(drop=True)
    return test_df

In [33]:
features = ['IsWeekend',   'Sales_MA_7', 
         ]
target = 'Sales'
plot_graphs = True

train_df, test_df = get_train_val_test_dfs(combined_df)
model, history = model_train(train_df,  features, target)
# if(plot_graphs):
#     plot_training_graphs(history)

test_df = model_test(model, test_df,features, target)
print(f"MAPE for test data: {mape(test_df['Sales'],test_df['Predicted_Sales'])}")

if(plot_graphs):
    plot_test_graphs(test_df)


Epoch 1/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 13829957632.0000 - mape: 45.1231
Epoch 2/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1539121152.0000 - mape: 32.3863
Epoch 3/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1442191744.0000 - mape: 34.0599
Epoch 4/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1579573632.0000 - mape: 33.1489
Epoch 5/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1249132672.0000 - mape: 34.8035
Epoch 6/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1294899072.0000 - mape: 32.1576
Epoch 7/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1594014464.0000 - mape: 34.1951
Epoch 8/100
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1667610240.0000


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [26]:
errors = pd.DataFrame()
errors['Date'] = test_df['Date']
errors['Sales'] = test_df['Sales']
errors['Predicted_Sales'] = test_df['Predicted_Sales']
errors['IsWeekend'] = test_df['IsWeekend']
errors['APE'] = abs(errors['Sales'] - errors['Predicted_Sales'])*100/errors['Sales']
errors = errors.dropna()


In [27]:
errors['IsWeekend'] = errors['IsWeekend'].replace(0,'Weekday')
errors['IsWeekend'] = errors['IsWeekend'].replace(1,'Weekend')
errors

Unnamed: 0,Date,Sales,Predicted_Sales,IsWeekend,APE
0,2024-01-08,261946.412130,257328.343750,Weekday,1.762982
1,2024-01-09,232532.318540,259538.156250,Weekday,11.613800
2,2024-01-10,276852.706715,258889.140625,Weekday,6.488492
3,2024-01-11,258202.915770,261450.125000,Weekday,1.257619
4,2024-01-12,262595.085770,261590.265625,Weekday,0.382650
...,...,...,...,...,...
354,2024-12-27,267775.000000,262330.593750,Weekday,2.033202
355,2024-12-28,274362.000000,259277.375000,Weekend,5.498074
356,2024-12-29,244200.000000,258673.406250,Weekend,5.926866
357,2024-12-30,274584.000000,262125.515625,Weekday,4.537222


In [28]:
import plotly.express as px

fig = px.scatter(errors, x='Date', y='APE', color=errors['IsWeekend'], 
                 title="APE vs Date", color_discrete_map={'Weekday': 'blue', 'Weekend': 'red'})

fig.add_scatter(x=errors['Date'], y=errors['APE'], mode='lines', name='APE Trend')

fig.show()