In [1]:
import pandas as pd
import os

# Preparing the data.

In [2]:
root = os.path.join("..","Data")
df = pd.read_excel(os.path.join(root, 'DEMAND_FILE.xlsx'),
                    sheet_name= 'Daily CGD Volume - Ahmedabad')
df = df.iloc[0:3,:] # Droping unwanted coloumns
df = df.T # Transposing the df to have the different dates as rows.
df.columns = df.iloc[0]
df = df.iloc[1:].reset_index(drop=True)
df['Date'] = df['Date'].astype('str').str.split().str[0] # Removing time from the Date coloumn.
df['Date'] = pd.to_datetime(df['Date']) # Converting Date to DateTime
df = df.rename(columns={'CNG Sales Ahmedabad (SCM/day)': 'Sales'})
df['Sales'] = df['Sales'].astype('float64')
df.columns.name = None
df

Unnamed: 0,Date,Weekday,Sales
0,2023-04-01,Saturday,489076.113688
1,2023-04-02,Sunday,491107.744591
2,2023-04-03,Monday,463311.399700
3,2023-04-04,Tuesday,456306.343200
4,2023-04-05,Wednesday,493338.999138
...,...,...,...
270,2023-12-27,Wednesday,571390.999704
271,2023-12-28,Thursday,576890.976666
272,2023-12-29,Friday,572104.814138
273,2023-12-30,Saturday,585843.459244


In [3]:
import calendar
def days_in_month(row):
    return calendar.monthrange(row['Year'], row['Month'])[1]

df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.weekday
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['DayOfYear'] = df['Date'].dt.dayofyear
df['IsWeekend'] = (df['Weekday'] >= 5).astype(int)
df['DaysInMonth'] = df.apply(days_in_month, axis=1)
df


Unnamed: 0,Date,Weekday,Sales,Day,Month,Year,DayOfYear,IsWeekend,DaysInMonth
0,2023-04-01,5,489076.113688,1,4,2023,91,1,30
1,2023-04-02,6,491107.744591,2,4,2023,92,1,30
2,2023-04-03,0,463311.399700,3,4,2023,93,0,30
3,2023-04-04,1,456306.343200,4,4,2023,94,0,30
4,2023-04-05,2,493338.999138,5,4,2023,95,0,30
...,...,...,...,...,...,...,...,...,...
270,2023-12-27,2,571390.999704,27,12,2023,361,0,31
271,2023-12-28,3,576890.976666,28,12,2023,362,0,31
272,2023-12-29,4,572104.814138,29,12,2023,363,0,31
273,2023-12-30,5,585843.459244,30,12,2023,364,1,31


In [4]:
import numpy as np
# Apply Cyclical Encoding
df['Day_sin'] = np.sin(2 * np.pi * df['Day'] / df['DaysInMonth'])
df['Day_cos'] = np.cos(2 * np.pi * df['Day'] / df['DaysInMonth'])

df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)

df['Weekday_sin'] = np.sin(2 * np.pi * df['Weekday'] / 7)
df['Weekday_cos'] = np.cos(2 * np.pi * df['Weekday'] / 7)

df['DayOfYear_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
df['DayOfYear_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)

# Drop raw columns if necessary
df = df.drop(columns=['Weekday', 'Month', 'DayOfYear', 'DaysInMonth','Day','Year'])
df

Unnamed: 0,Date,Sales,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos
0,2023-04-01,489076.113688,1,2.079117e-01,0.978148,8.660254e-01,-0.5,-0.974928,-0.222521,9.999907e-01,0.004304
1,2023-04-02,491107.744591,1,4.067366e-01,0.913545,8.660254e-01,-0.5,-0.781831,0.623490,9.999167e-01,-0.012910
2,2023-04-03,463311.399700,0,5.877853e-01,0.809017,8.660254e-01,-0.5,0.000000,1.000000,9.995463e-01,-0.030120
3,2023-04-04,456306.343200,0,7.431448e-01,0.669131,8.660254e-01,-0.5,0.781831,0.623490,9.988797e-01,-0.047321
4,2023-04-05,493338.999138,0,8.660254e-01,0.500000,8.660254e-01,-0.5,0.974928,-0.222521,9.979172e-01,-0.064508
...,...,...,...,...,...,...,...,...,...,...,...
270,2023-12-27,571390.999704,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,0.974928,-0.222521,-6.880243e-02,0.997630
271,2023-12-28,576890.976666,0,-5.712682e-01,0.820763,-2.449294e-16,1.0,0.433884,-0.900969,-5.161967e-02,0.998667
272,2023-12-29,572104.814138,0,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.433884,-0.900969,-3.442161e-02,0.999407
273,2023-12-30,585843.459244,1,-2.012985e-01,0.979530,-2.449294e-16,1.0,-0.974928,-0.222521,-1.721336e-02,0.999852


In [5]:
df['Sales_MA_7'] = df['Sales'].rolling(window=7).mean().shift(1)  # 7-day moving avg
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,Date,Sales,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos,Sales_MA_7
0,2023-04-08,542228.194163,1,9.945219e-01,-0.104528,8.660254e-01,-0.5,-0.974928,-0.222521,9.932568e-01,-0.115935,470962.212977
1,2023-04-09,478717.394779,1,9.510565e-01,-0.309017,8.660254e-01,-0.5,-0.781831,0.623490,9.911141e-01,-0.133015,478555.367331
2,2023-04-10,483230.277816,0,8.660254e-01,-0.500000,8.660254e-01,-0.5,0.000000,1.000000,9.886776e-01,-0.150055,476785.317357
3,2023-04-11,485970.141540,0,7.431448e-01,-0.669131,8.660254e-01,-0.5,0.781831,0.623490,9.859481e-01,-0.167052,479630.871374
4,2023-04-12,484442.664286,0,5.877853e-01,-0.809017,8.660254e-01,-0.5,0.974928,-0.222521,9.829266e-01,-0.183998,483868.556851
...,...,...,...,...,...,...,...,...,...,...,...,...
263,2023-12-27,571390.999704,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,0.974928,-0.222521,-6.880243e-02,0.997630,581611.016546
264,2023-12-28,576890.976666,0,-5.712682e-01,0.820763,-2.449294e-16,1.0,0.433884,-0.900969,-5.161967e-02,0.998667,579815.388893
265,2023-12-29,572104.814138,0,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.433884,-0.900969,-3.442161e-02,0.999407,578344.850415
266,2023-12-30,585843.459244,1,-2.012985e-01,0.979530,-2.449294e-16,1.0,-0.974928,-0.222521,-1.721336e-02,0.999852,576290.337907


In [6]:
sales_df = df

In [7]:
weather_df = pd.read_csv(os.path.join(root,'Ahmedabad_Weather_Data_Cleaned.csv')).drop(range(0,7)).reset_index(drop=True)
weather_df

Unnamed: 0,Date,Temp,Humidity,Rain
0,2023-04-08,88.9,29.8,0.0
1,2023-04-09,89.9,29.1,0.0
2,2023-04-10,91.4,28.9,0.0
3,2023-04-11,92.0,30.7,0.0
4,2023-04-12,90.0,33.3,0.0
...,...,...,...,...
263,2023-12-27,71.2,72.4,0.0
264,2023-12-28,72.5,60.0,0.0
265,2023-12-29,71.4,64.1,0.0
266,2023-12-30,71.9,69.3,0.0


In [8]:
combined_df = pd.concat([sales_df,weather_df.iloc[:,1:]],axis = 1)
combined_df = combined_df[['Date','IsWeekend', 'Day_sin', 'Day_cos', 'Month_sin', 'Month_cos', 'Weekday_sin',
                           'Weekday_cos', 'DayOfYear_sin', 'DayOfYear_cos','Temp','Humidity','Rain','Sales_MA_7',
                           'Sales']]
combined_df = combined_df.dropna().reset_index(drop=True)
combined_df

Unnamed: 0,Date,IsWeekend,Day_sin,Day_cos,Month_sin,Month_cos,Weekday_sin,Weekday_cos,DayOfYear_sin,DayOfYear_cos,Temp,Humidity,Rain,Sales_MA_7,Sales
0,2023-04-08,1,9.945219e-01,-0.104528,8.660254e-01,-0.5,-0.974928,-0.222521,9.932568e-01,-0.115935,88.9,29.8,0.0,470962.212977,542228.194163
1,2023-04-09,1,9.510565e-01,-0.309017,8.660254e-01,-0.5,-0.781831,0.623490,9.911141e-01,-0.133015,89.9,29.1,0.0,478555.367331,478717.394779
2,2023-04-10,0,8.660254e-01,-0.500000,8.660254e-01,-0.5,0.000000,1.000000,9.886776e-01,-0.150055,91.4,28.9,0.0,476785.317357,483230.277816
3,2023-04-11,0,7.431448e-01,-0.669131,8.660254e-01,-0.5,0.781831,0.623490,9.859481e-01,-0.167052,92.0,30.7,0.0,479630.871374,485970.141540
4,2023-04-12,0,5.877853e-01,-0.809017,8.660254e-01,-0.5,0.974928,-0.222521,9.829266e-01,-0.183998,90.0,33.3,0.0,483868.556851,484442.664286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,2023-12-27,0,-7.247928e-01,0.688967,-2.449294e-16,1.0,0.974928,-0.222521,-6.880243e-02,0.997630,71.2,72.4,0.0,581611.016546,571390.999704
264,2023-12-28,0,-5.712682e-01,0.820763,-2.449294e-16,1.0,0.433884,-0.900969,-5.161967e-02,0.998667,72.5,60.0,0.0,579815.388893,576890.976666
265,2023-12-29,0,-3.943559e-01,0.918958,-2.449294e-16,1.0,-0.433884,-0.900969,-3.442161e-02,0.999407,71.4,64.1,0.0,578344.850415,572104.814138
266,2023-12-30,1,-2.012985e-01,0.979530,-2.449294e-16,1.0,-0.974928,-0.222521,-1.721336e-02,0.999852,71.9,69.3,0.0,576290.337907,585843.459244


# Training the Model.

In [9]:

import plotly.graph_objects as go

def get_train_val_test_dfs(combined_df):
    train_size = int(0.7 * len(combined_df))  # 70% for training
    # val_size = int(0.85 * len(combined_df)) # 15% for validation
    train_df  = combined_df[:train_size].reset_index(drop=True)
    # val_df = combined_df[train_size:val_size].reset_index(drop=True)
    test_df = combined_df[train_size:].reset_index(drop=True)
    return train_df, test_df

def plot_training_graphs(history):
    history_df = pd.DataFrame.from_dict(history.history)
    history_df['epoch'] = history_df.index + 1

    fig = go.Figure()
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['loss'], mode = 'lines+markers', name='Training Loss'))
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['val_loss'], mode = 'lines+markers', name = 'Validation Loss'))
    fig.update_layout(title="Training and Validation Loss",
                    xaxis_title="Epochs", yaxis_title="Loss")
    fig.show()

    fig = go.Figure()
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['mape'], mode = 'lines+markers', name='Training MAPE'))
    fig.add_trace(go.Line(x=history_df['epoch'], y = history_df['val_mape'], mode = 'lines+markers', name = 'Validation MAPE'))
    fig.update_layout(title="Training and Validation MAPE",
                    xaxis_title="Epochs", yaxis_title="MAPE")
    fig.show()

def plot_test_graphs(test_df):
    fig = go.Figure()

    fig.add_trace(go.Line(x = test_df['Date'],
                        y = test_df['Sales'],
                        mode = 'lines+markers',
                        name = 'Actual Sales'))
    fig.add_trace(go.Line(x = test_df['Date'],
                        y = test_df['Predicted_Sales'],
                        mode = 'lines+markers',
                        name = 'Predicted Sales'))

    fig.update_layout(xaxis_title = 'Date', yaxis_title = 'Sales', title = 'CNG Sales')
    fig.show()



In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Input
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.losses import Huber
from tensorflow.keras.callbacks import EarlyStopping

def mape(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Avoid division by zero by adding a small epsilon where y_true is zero
    epsilon = tf.keras.backend.epsilon()
    y_true = tf.where(tf.equal(y_true, 0), epsilon, y_true)

    return tf.reduce_mean(tf.abs((y_true - y_pred) / y_true)) * 100

def model_train(train_df, features, target):
    
    X_train, y_train = train_df[features].values, train_df[target].values
    # X_val, y_val = val_df[features].values, val_df[target].values
    
    early_stopping = EarlyStopping(
    monitor='mape',
    mode = 'min',
    patience=15,
    restore_best_weights=True )

    # Define FFNN model
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1)
    ])

    # Compile and train
    model.compile(optimizer='adam', loss='mse' , metrics=[mape])
    history = model.fit(X_train, y_train, epochs=100, batch_size=4, callbacks=[early_stopping])

    return model, history


In [12]:
import numpy as np

def model_test(model, test_df, features, target):
    test_df['Sales_MA_7'] = np.nan
    test_df['Predicted_Sales'] = np.nan

    for row in test_df.itertuples(index=True, name='Rows'):
        if row.Index >= 7:
            test_df.loc[row.Index, 'Sales_MA_7'] = test_df.iloc[row.Index-7:row.Index]['Sales'].mean()
            X_test = np.array(test_df.loc[row.Index, features].values, dtype = np.float64)
            test_df.loc[row.Index, 'Predicted_Sales'] = model.predict(X_test.reshape(1,len(features)))
    test_df = test_df.dropna().reset_index(drop=True)
    return test_df

In [19]:
features = ['IsWeekend', 'Day_sin', 'Day_cos', 'Month_sin', 'Month_cos',
       'Weekday_sin', 'Weekday_cos', 'DayOfYear_sin', 'DayOfYear_cos', 'Temp',
       'Humidity', 'Rain', 'Sales_MA_7', 
         ]
target = 'Sales'
plot_graphs = True

train_df, test_df = get_train_val_test_dfs(combined_df)
model, history = model_train(train_df,  features, target)
# if(plot_graphs):
#     plot_training_graphs(history)

test_df = model_test(model, test_df,features, target)
print(f"MAPE for test data: {mape(test_df['Sales'],test_df['Predicted_Sales'])}")

if(plot_graphs):
    plot_test_graphs(test_df)


Epoch 1/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 158248796160.0000 - mape: 72.3572 
Epoch 2/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 5184754176.0000 - mape: 12.2005
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 887343232.0000 - mape: 6.7554
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1103346304.0000 - mape: 6.9433
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1144019712.0000 - mape: 6.7971
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1238882048.0000 - mape: 7.1611
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1196305152.0000 - mape: 7.7058
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 997515200.0000 - mape: 7.3384
Epoch


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [20]:
errors = pd.DataFrame()
errors['Date'] = test_df['Date']
errors['Sales'] = test_df['Sales']
errors['Predicted_Sales'] = test_df['Predicted_Sales']
errors['IsWeekend'] = test_df['IsWeekend']
errors['APE'] = abs(errors['Sales'] - errors['Predicted_Sales'])*100/errors['Sales']
errors = errors.dropna()


In [21]:
errors['IsWeekend'] = errors['IsWeekend'].replace(0,'Weekday')
errors['IsWeekend'] = errors['IsWeekend'].replace(1,'Weekend')
errors

Unnamed: 0,Date,Sales,Predicted_Sales,IsWeekend,APE
0,2023-10-19,591696.920272,590272.5625,Weekday,0.240724
1,2023-10-20,591895.789056,590592.6250,Weekday,0.220168
2,2023-10-21,608842.275723,589203.7500,Weekend,3.225552
3,2023-10-22,526493.937779,595462.7500,Weekend,13.099640
4,2023-10-23,547643.104569,590892.7500,Weekday,7.897414
...,...,...,...,...,...
69,2023-12-27,571390.999704,584632.8750,Weekday,2.317481
70,2023-12-28,576890.976666,582827.4375,Weekday,1.029044
71,2023-12-29,572104.814138,581349.0000,Weekday,1.615820
72,2023-12-30,585843.459244,579284.1875,Weekend,1.119629


In [22]:
import plotly.express as px

fig = px.scatter(errors, x='Date', y='APE', color=errors['IsWeekend'], 
                 title="APE vs Date", color_discrete_map={'Weekday': 'blue', 'Weekend': 'red'})

fig.add_scatter(x=errors['Date'], y=errors['APE'], mode='lines', name='APE Trend')

fig.show()