<a href="https://colab.research.google.com/github/srikarraju/eGrocery_Demand_Prediction/blob/main/eGrocery_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
import gym
from gym import spaces
import numpy as np

class eGroceryEnv(gym.Env):
  def __init__(self, df, shelf_life, shortage_cost, wastage_cost, box_size, products_count=10, max_demand=100):
    self.df = df
    self.n_features = 3 + 4*products_count + len(shelf_life) + sum(shelf_life)
    self.products_count = products_count
    self.shelf_life = shelf_life
    self.shortage_cost = shortage_cost
    self.wastage_cost = wastage_cost
    self.box_size = box_size

    super(eGroceryEnv,self).__init__()

    self.action_spaces = spaces.MultiDiscrete([max_demand]*self.products_count)
    self.observation_space = spaces.Box(low = 0, high = 20000, shape=(self.n_features,), dtype = np.float16)

    self.ind_shortages = list([])
    self.ind_wastages = list([])
    self.all_shortages = list([])
    self.all_wastages = list([])

    self.rewards = list([])
    self.total_loss = list([])
    self.curr_step = 0

    self.curr_stock = list([])
    for i in range(self.products_count):
      self.curr_stock.append([])
      for j in range(self.shelf_life[i]):
        self.curr_stock[i].append(0)

  def step(self, action):

    # remove products out of shelf life from stock and update wastages
    wastages = []
    #print(action)
    for i in range(self.products_count):
      wastages.append(self.curr_stock[i].pop(0))
      self.curr_stock[i].append(min(int(action[i]*self.box_size[i]),1000))
    #print(wastages)
    self.ind_wastages.append(wastages)
    self.all_wastages.append(sum(wastages))

    # Full fill the curr day demand and track shortages
    prods = ['prod'+str(i) for i in [8,11,15,17,94,95,96,110,112,128]]
    demands = self.df.loc[self.curr_step,prods].tolist()
    demands = [int(demand) for demand in demands]
    #print(demands)
    #print(self.curr_stock)
    shortages = []
    for i in range(self.products_count):
      for j in range(self.shelf_life[i]):
        if self.curr_stock[i][j] >=demands[i]:
          self.curr_stock[i][j] -= demands[i]
          demands[i] = 0
          break
        else:
          demands[i] -= self.curr_stock[i][j]
          self.curr_stock[i][j] = 0
      #print(demands[i])
      shortages.append(int(demands[i]))
    self.ind_shortages.append(shortages)
    self.all_shortages.append(sum(shortages))


    # Cacluate overall reward
    self.curr_step += 1
    #print(wastages,shortages)
    reward = -1*(np.dot(self.wastage_cost,np.asarray(wastages))+np.dot(self.shortage_cost,np.asarray(shortages)))
    self.rewards.append(reward)

    # Update Next sate
    obs  = self.df.loc[self.curr_step]
    next_state = list([])
    for i in range(len(self.curr_stock)):
      next_state += self.curr_stock[i]
    obs = list(obs) + list(next_state) + list(self.shelf_life)
    # print('Observation:',len(obs))

    done = (self.curr_step < 0) or (self.curr_step > self.df.shape[0]-2)


    return obs, done, reward, {}

  def reset(self):
		# Reset the state of the environment to an initial state
    self.ind_shortages = list([])
    self.ind_wastages = list([])
    self.all_shortages = list([])
    self.all_wasatges = list([])

    self.rewards = list([])
    self.total_loss = list([])
    self.curr_step = 0
    self.curr_stock = list([])
    for i in range(self.products_count):
      self.curr_stock.append([])
      for j in range(self.shelf_life[i]):
        self.curr_stock[i].append(0)

    return [1,0,1]+[0]*(self.n_features-3)

In [None]:
import torch
import torch.nn as nn
class DemandPredictorNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, lr=0.01, activation='relu'):
    super(DemandPredictorNN, self). __init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.linear2 = nn.Linear(hidden_size,output_size)
    self.relu = nn.ReLU()

  def forward(self, input):
    output = self.linear1(input)
    output = self.relu(output)
    output = self.linear2(output)
    output = self.relu(output)
    # print(torch.any(torch.isnan(output)))
    return output

  def init_weights(self):
    torch.nn.init.xavier_uniform(self.linear1.weight)
    torch.nn.init.xavier_uniform(self.linear2.weight)

In [None]:
shelf_life = np.array([4, 3, 5, 10, 7, 2, 1, 3, 8, 6], dtype=np.int32)
box_size = np.asarray([6, 10, 15, 4, 6, 2, 7, 50, 2, 30], dtype=np.int32)
products_count = 10

wastage_cost = np.array([1]*products_count, dtype=np.float16)
shortage_cost = np.array([1]*products_count, dtype=np.float16)

train_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/eGrocery_Project/final_data_trainx.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/eGrocery_Project/final_data_testx.csv')

env1 = eGroceryEnv(train_df, shelf_life=shelf_life, shortage_cost=shortage_cost, wastage_cost=wastage_cost, box_size=box_size, products_count=10, max_demand=100)
env2 = eGroceryEnv(test_df, shelf_life=shelf_life, shortage_cost=shortage_cost, wastage_cost=wastage_cost, box_size=box_size, products_count=10, max_demand=100)

In [None]:
train_df.columns

Index(['monthday', 'weekday', 'month', 'prod8', 'prod11', 'prod15', 'prod17',
       'prod94', 'prod95', 'prod96', 'prod110', 'prod112', 'prod128',
       'prod8avg7', 'prod8avg15', 'prod8avg30', 'prod11avg7', 'prod11avg15',
       'prod11avg30', 'prod15avg7', 'prod15avg15', 'prod15avg30', 'prod17avg7',
       'prod17avg15', 'prod17avg30', 'prod94avg7', 'prod94avg15',
       'prod94avg30', 'prod95avg7', 'prod95avg15', 'prod95avg30', 'prod96avg7',
       'prod96avg15', 'prod96avg30', 'prod110avg7', 'prod110avg15',
       'prod110avg30', 'prod112avg7', 'prod112avg15', 'prod112avg30',
       'prod128avg7', 'prod128avg15', 'prod128avg30'],
      dtype='object')

In [None]:
window_size = 30
products = [8,11,15,17,94,95,96,110,112,128]
col_avg_demand = ['prod'+str(i)+'avg'+str(window_size) for i in products]
col_demand = ['prod'+str(i) for i in products]
actual_predictions = train_df[col_demand].iloc[1:]

In [None]:
# Setting up optimizer, lr scheduler
predictor = DemandPredictorNN(102, 256, len(products))
predictor.init_weights()
num_epochs = 1000
epoch = 1
loss_fn = nn.MSELoss()
opt = torch.optim.Adam(predictor.parameters(),lr=0.001)
# opt = torch.optim.SGD(predictor.parameters(),lr=0.01)
scheduler = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.999)
loss_tracker = []
reward_tracker = []



In [None]:
while epoch<=num_epochs:
  state = env1.reset()
  done = False
  step = 0
  opt.zero_grad()
  curr_loss = 0
  epoch_rewards = []
  while done != True:
    preds = predictor(torch.Tensor(state))
    # print(state)
    target = [i / j for i, j in zip(actual_predictions.iloc[step].tolist(), box_size)]
    target = torch.Tensor(target)
    loss = loss_fn(preds,target)
    loss.backward()
    curr_loss += loss.item()
    action = preds.tolist()
    # print(action)
    state, done, reward,_ = env1.step(action)
    epoch_rewards.append(reward)
    step += 1
  opt.step()
  scheduler.step()
  epoch += 1
  reward_tracker.append(sum(epoch_rewards))
  print('Epoch:',epoch,' ',curr_loss, sum(epoch_rewards))
  loss_tracker.append(curr_loss)

Epoch: 2   25935496.006721497 -3416737.0
Epoch: 3   8781190.589234352 -2096552.0
Epoch: 4   4645579.721150586 -1535294.0
Epoch: 5   2553153.3127332805 -1174555.0
Epoch: 6   1025448.6749508737 -854170.0
Epoch: 7   306702.93456917297 -346766.0
Epoch: 8   62512.64976288693 -234582.0
Epoch: 9   28403.597230890235 -220331.0
Epoch: 10   5644.668177019736 -207025.0
Epoch: 11   5631.569393390622 -207114.0
Epoch: 12   5625.08182989201 -207072.0
Epoch: 13   5613.632444904601 -207006.0
Epoch: 14   5606.3069963567905 -206960.0
Epoch: 15   5606.191274315097 -206942.0
Epoch: 16   5598.603310868995 -206880.0
Epoch: 17   5589.380110884529 -206817.0
Epoch: 18   5585.089047295706 -206796.0
Epoch: 19   5583.239655395919 -206783.0
Epoch: 20   5581.842532735596 -206789.0
Epoch: 21   5581.495446363348 -206807.0
Epoch: 22   5581.562432234464 -206821.0
Epoch: 23   5580.989976450037 -206830.0
Epoch: 24   5580.674630506419 -206839.0
Epoch: 25   5580.597085003124 -206845.0
Epoch: 26   5580.5142252847945 -206854.

KeyboardInterrupt: ignored