# Markov-Chain - MCMC Supermarket Simulation

In [2]:
# Import all required packages
import pandas as pd
import numpy as np
import datetime 
import glob
from datetime import datetime as date
import matplotlib.pyplot as plt

In [3]:
# Read in all csv-files
all_files = glob.glob("data\*.CSV")

day_list = ["monday.csv", "tuesday.csv", "wednesday.csv", "thursday.csv", "friday.csv"]

df_list = []
for f in day_list:
    cur_df = pd.read_csv(f"data\{f}", header=0, delimiter=";")
    cur_df.columns = cur_df.columns.str.strip()
    df_list.append(cur_df)
df = pd.concat(df_list)

In [4]:
# Define function to gerneate date variables
def datetime_gen(df, dt_list, var='datetime'):  
    for i in dt_list:
        df[i]=eval("pd.DatetimeIndex(df[var])." + i)
    if "weekday" in dt_list:
        days = { 0 : 'mon' ,1 : 'tue' ,2 : 'wed' ,3 : 'thu' ,4 : 'fri'}
        df["dayofweek"] = df['weekday'].apply(lambda x: days[x])
    df[var]=pd.DatetimeIndex(df[var])
    return df

In [5]:
# Dummy function
def create_dummies(df, column,dummy_na=False,drop_first=False):
    import pandas as pd
    dummies = pd.get_dummies(df[column], dummy_na=dummy_na, drop_first=drop_first)
    df = pd.concat([df, dummies], axis=1, ignore_index=False)
    return df

In [6]:
# Apply function
dt_list = ['weekday', 'hour', 'minute'] # 'day', 'month', 'year',
df = datetime_gen(df, dt_list, var='timestamp')
df["cust_id"] = df["dayofweek"] + df["customer_no"].astype(str)

In [7]:
# Generate revenue variable
df["revenue"] = 0
rev_dic = {"fruit": 4, "spices": 3, "dairy": 5, "drinks": 6}
for i in rev_dic.keys():
    df.loc[df["location"]==i, "revenue"] = rev_dic[i]

In [8]:
# add total duration of customer in supermarket
df["time_min"]=df.groupby("cust_id").transform("min")["timestamp"]
df["time_max"]=df.groupby("cust_id").transform("max")["timestamp"]
df["time_total"]=df["time_max"]-df["time_min"]

In [9]:
# Indicator for first location of cust_id
df["first_loc"] = (df.groupby('cust_id').cumcount() == 0).astype(int)

In [10]:
# add before / afer structure
df["next_loc"]= df.groupby("cust_id")["location"].shift(-1)
df = create_dummies(df, "location")

In [11]:
# Transition probabilities
trans_prob=pd.crosstab(df['next_loc'], df['location'], normalize=1)
trans_prob["checkout"] = 0.0
trans_prob["checkout"].iloc[[0,0]] = 1.0
loc_list = ['checkout', 'dairy', 'drinks', 'fruit', 'spices']
trans_prob = trans_prob[loc_list]
trans_prob

location,checkout,dairy,drinks,fruit,spices
next_loc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
checkout,1.0,0.391211,0.537023,0.499511,0.251
dairy,0.0,0.0,0.027159,0.238319,0.323553
drinks,0.0,0.223151,0.0,0.136266,0.27314
fruit,0.0,0.189925,0.219062,0.0,0.152307
spices,0.0,0.195713,0.216756,0.125904,0.0


In [12]:
# get initial values for a customer
df_first = df[df["first_loc"]==1]
initial = np.array(df_first[loc_list][df_first["cust_id"]=="mon1"])
trans_prob_arr=np.array(trans_prob)
choices = np.dot(trans_prob, initial.T)
# loc_list.index(random.choices(loc_list, weights=choices, k=1))

In [13]:
import random
choices = np.dot(trans_prob_arr, initial.T)
while choices[0] != 1.0:
    next_step = np.zeros(5) 
    next_step[np.where(choices==list(random.choices(choices, weights=choices, k=1)))[0]]=1
    choices = np.dot(trans_prob_arr, next_step.T)
    print(next_step)

[1. 0. 0. 0. 0.]


In [31]:
class Customer:

   def __init__(self, id, state, transition_mat):
      self.id = id
      self.state = state
      self.transition_mat = transition_mat
      self.weights = np.dot(self.transition_mat, self.state.T)

   # def __repr__(self):
   #    """
   #    Returns a csv string for that customer.
   #    """
   #    return f'<Customer({self.id}, {self.state}), {self.weights})>'

   def is_active(self):
      """
      Returns True if the customer has not reached the checkout
      for the second time yet, False otherwise.
      """
      a = list(self.state != 1)
      if a[0] != "True":
         return True

   def next_state(self):
      """
      Propagates the customer to the next state
      using a weighted random choice from the transition probabilities
      conditional on the current state.
      Returns nothing.
      """
      import random
      next_step      = np.zeros(5) 
      next_step[np.where(self.weights==list(random.choices(self.weights, weights=self.weights, k=1)))[0]] = 1
      self.state     = next_step 
      self.weights   = np.dot(self.transition_mat, self.state.T)

In [29]:
# Test function
df_first        =   df[df["first_loc"]==1]
trans_prob_arr  =   np.array(trans_prob)
state           =   np.array(df_first[loc_list][df_first["cust_id"]=="mon1"])
c = Customer("mon1", state, trans_prob_arr)

#c.is_active()
c.is_active()


True

In [39]:
# get initial values for a customer
trans_prob_arr  =   np.array(trans_prob)
time_dic        =   {}

for t in set(df['timestamp']):

    df_time         =   df[df["timestamp"]==t]
    df_first        =   df_time[df_time["first_loc"]==1]
    
    for c in set(df_first["cust_id"]): 
        for j in time_dic.keys():
            if j.is_active():  
                state          =   np.array(df_first[loc_list][df_first["cust_id"]==j])
                j.next_state()           
                time_dic[j]    =   j.state
            else:
                del time_dic[j]

        state          =   np.array(df_first[loc_list][df_first["cust_id"]==c])
        c              =   Customer(c, state, trans_prob_arr)
        time_dic[c]    =   list(c.state)[0]
    print(time_dic)


# loc_list.index(random.choices(loc_list, weights=choices, k=1))

{<__main__.Customer object at 0x0000024442879850>: array([0, 0, 0, 1, 0], dtype=uint8)}
{<__main__.Customer object at 0x0000024442879850>: array([1., 0., 0., 0., 0.]), <__main__.Customer object at 0x0000024442BA43A0>: array([0., 1., 0., 0., 0.]), <__main__.Customer object at 0x0000024442ADD640>: array([0, 0, 1, 0, 0], dtype=uint8)}
{<__main__.Customer object at 0x0000024442879850>: array([1., 0., 0., 0., 0.]), <__main__.Customer object at 0x0000024442BA43A0>: array([0., 1., 0., 0., 0.]), <__main__.Customer object at 0x0000024442ADD640>: array([0, 0, 1, 0, 0], dtype=uint8)}
{<__main__.Customer object at 0x0000024442879850>: array([1., 0., 0., 0., 0.]), <__main__.Customer object at 0x0000024442BA43A0>: array([0., 1., 0., 0., 0.]), <__main__.Customer object at 0x0000024442ADD640>: array([0, 0, 1, 0, 0], dtype=uint8)}
{<__main__.Customer object at 0x0000024442879850>: array([1., 0., 0., 0., 0.]), <__main__.Customer object at 0x0000024442BA43A0>: array([0., 1., 0., 0., 0.]), <__main__.Custo

# Moving customers in shop

In [17]:
from PIL import Image
import numpy as np

im = Image.open('supermarket.png')
im2 = Image.open('tiles.png')
market = np.array(im)
tiles = np.array(im2)
print(market.shape, market.dtype)
# print(tiles.shape, tiles.dtype)

(384, 576, 3) uint8


In [18]:
x = 4 * 32   # 5th column starting from 0
y = 1 * 32   # 2nd row
apple = tiles[y:y+32, x:x+32]

In [19]:
tx = 13 * 32
ty = 2 * 32
market[ty:ty+32, tx:tx+32] = apple

In [20]:
im = Image.fromarray(market)
im.save('supermarket_filled.png')