<a href="https://colab.research.google.com/github/srikarraju/eGrocery_Demand_Prediction/blob/main/Moving_Average_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
cd /content/drive/MyDrive/Colab Notebooks/eGrocery_Project

/content/drive/MyDrive/Colab Notebooks/eGrocery_Project


In [None]:
import pandas as pd
import numpy as np

#from eGroceryEnv import eGroceryEnv

In [None]:
import gym
from gym import spaces
import numpy as np

class eGroceryEnv(gym.Env):
  def __init__(self, df, shelf_life, shortage_cost, wastage_cost, box_size, products_count=10, max_demand=100):
    self.df = df
    self.n_features = 3 + 4*products_count + len(shelf_life) + sum(shelf_life)
    self.products_count = products_count
    self.shelf_life = shelf_life
    self.shortage_cost = shortage_cost
    self.wastage_cost = wastage_cost
    self.box_size = box_size

    super(eGroceryEnv,self).__init__()

    self.action_spaces = spaces.MultiDiscrete([max_demand]*self.products_count)
    self.observation_space = spaces.Box(low = 0, high = 20000, shape=(self.n_features,), dtype = np.float16)

    self.ind_shortages = list([])
    self.ind_wastages = list([])
    self.all_shortages = list([])
    self.all_wastages = list([])

    self.rewards = list([])
    self.total_loss = list([])
    self.curr_step = 0

    self.curr_stock = list([])
    for i in range(self.products_count):
      self.curr_stock.append([])
      for j in range(self.shelf_life[i]):
        self.curr_stock[i].append(0)

  def step(self, action):

    # remove products out of shelf life from stock and update wastages
    wastages = []
    #print(action)
    for i in range(self.products_count):
      wastages.append(self.curr_stock[i].pop(0))
      self.curr_stock[i].append(action[i]*self.box_size[i])
    #print(wastages)
    self.ind_wastages.append(wastages)
    self.all_wastages.append(sum(wastages))

    # Full fill the curr day demand and track shortages
    prods = ['prod'+str(i) for i in [8,11,15,17,94,95,96,110,112,128]]
    demands = self.df.loc[self.curr_step,prods].tolist()
    demands = [int(demand) for demand in demands]
    #print(demands)
    #print(self.curr_stock)
    shortages = []
    for i in range(self.products_count):
      for j in range(self.shelf_life[i]):
        if self.curr_stock[i][j] >=demands[i]:
          self.curr_stock[i][j] -= demands[i]
          demands[i] = 0
          break
        else:
          demands[i] -= self.curr_stock[i][j]
          self.curr_stock[i][j] = 0
      #print(demands[i])
      shortages.append(int(demands[i]))
    self.ind_shortages.append(shortages)
    self.all_shortages.append(sum(shortages))


    # Cacluate overall reward
    self.curr_step += 1
    #print(wastages,shortages)
    reward = -1*(np.dot(self.wastage_cost,np.asarray(wastages))+np.dot(self.shortage_cost,np.asarray(shortages)))
    self.rewards.append(reward)

    # Update Next sate
    obs  = self.df.loc[self.curr_step]
    next_state = list([])
    for i in range(len(self.curr_stock)):
      for j in range(len(self.curr_stock[i])):
        next_state.append(self.curr_stock[i][j])
    obs = list(obs) + list(next_state) + list(self.shelf_life)

    done = (self.curr_step < 0) or (self.curr_step > self.df.shape[0]-2)


    return obs, done, reward, {}

  def reset(self):
		# Reset the state of the environment to an initial state
    self.ind_shortages = list([])
    self.ind_wastages = list([])
    self.all_shortages = list([])
    self.all_wasatges = list([])

    self.rewards = list([])
    self.total_loss = list([])
    self.curr_step = 0
    self.curr_stock = list([])
    for i in range(self.products_count):
      self.curr_stock.append([])
      for j in range(self.shelf_life[i]):
        self.curr_stock[i].append(0)

    return [0]*self.n_features

In [None]:
shelf_life = np.array([4, 3, 5, 10, 7, 2, 1, 3, 8, 6], dtype=np.int32)
box_size = np.asarray([6, 10, 15, 4, 6, 2, 7, 50, 2, 30], dtype=np.int32)
products_count = 10

wastage_cost = np.array([1]*products_count, dtype=np.float16)
shortage_cost = np.array([1]*products_count, dtype=np.float16)

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/eGrocery_Project/final_data_trainx.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/eGrocery_Project/final_data_testx.csv')

In [None]:
env1 = eGroceryEnv(test_df, shelf_life=shelf_life, shortage_cost=shortage_cost, wastage_cost=wastage_cost, box_size=box_size, products_count=10, max_demand=100)
env2 = eGroceryEnv(test_df, shelf_life=shelf_life, shortage_cost=shortage_cost, wastage_cost=wastage_cost, box_size=box_size, products_count=10, max_demand=100)

In [None]:
window_size = 30
products = [8,11,15,17,94,95,96,110,112,128]
cols = ['prod'+str(i)+'avg'+str(window_size) for i in products]
initial_predictions = train_df.loc[len(train_df)-1,cols].tolist()

In [None]:
window_size = 1
products = [8,11,15,17,94,95,96,110,112,128]
cols = ['prod'+str(i) for i in products]
initial_predictions = train_df.loc[len(train_df)-1,cols].tolist()

In [None]:
# Evaluating moving avergae agent
state = env1.reset()
curr_predictions = train_df.loc[len(train_df)-1,cols].tolist()
total_reward = 0
actions_taken = []
#print(sum(env.curr_stock[0]))
for i in range(len(test_df)-1):
  quant_to_ship = [max(int(curr_predictions[j])-sum(row),0) for j,row in enumerate(env1.curr_stock)]
  action = [(demand//box_size[j]) for j,demand in enumerate(quant_to_ship)] #Always less than predicted quantity
  actions_taken.append(action)
  #print(int(curr_predictions[0])," ",int(test_df['prod8'].iloc[i])," ",sum(env.curr_stock[0])," ",quant_to_ship[0]," ",action[0])
  next_state, done,reward,_ = env1.step(np.asarray(action))
  #print(reward)
  curr_predictions = test_df.loc[i,cols].tolist()
  total_reward += reward
print(total_reward)

-2097.0


In [None]:
shortages = np.asarray(env1.ind_shortages)
print('Shortages:',sum(sum(shortages)))
wastages = np.asarray(env1.ind_wastages)
print('Wastages:',sum(sum(wastages)))

Shortages: 2028
Wastages: 69


In [None]:
# Evaluating moving avergae agent
state = env2.reset()
curr_predictions = train_df.loc[len(train_df)-1,cols].tolist()
total_reward = 0
actions_taken = []
#print(sum(env.curr_stock[0]))
for i in range(len(test_df)-1):
  quant_to_ship = [max(int(curr_predictions[j])-sum(row),0) for j,row in enumerate(env2.curr_stock)]
  action = [(demand//box_size[j])+1 if demand%box_size[j]!=0 else (demand//box_size[j]) for j,demand in enumerate(quant_to_ship)] #Always greater than or equal to predicted quantity
  actions_taken.append(action)
  #print(int(curr_predictions[0])," ",int(test_df['prod8'].iloc[i])," ",sum(env.curr_stock[0])," ",quant_to_ship[0]," ",action[0])
  next_state, done,reward,_ = env2.step(np.asarray(action))
  #print(reward)
  curr_predictions = test_df.loc[i,cols].tolist()
  total_reward += reward
print(total_reward)

-819.0


In [None]:
shortages = np.asarray(env2.ind_shortages)
print('Shortages:',sum(sum(shortages)))
wastages = np.asarray(env2.ind_wastages)
print('Wastages:',sum(sum(wastages)))

Shortages: 681
Wastages: 138


As one would expect, shortages comedown when boxes with more than precidted quantity is sent, whereas wastage go up