This notebook creates data for the pandas/pizza shop activity.  A new timestamped output file is created each time the notebook is run.

Data is generated for pizza sales in 2022.

In [1]:
import numpy as np
import datetime
import pandas as pd

In [2]:
pizza_format_dict = {'medium': 13.0, 'large': 18.0, 'large_sicilian': 22.0}
pizza_format_weight = [0.2, 0.6, 0.2]
slice_format_dict = {'slice': 2.5, 'sicilian_slice': 3.0}
slice_format_weight = [0.8, 0.2]
cheese = ['regular', 'white', 'fresh_mozzarella']
cheese_weight = [0.7, 0.2, 0.1]
toppings = ['anchovy', 'garlic', 'meatball', 'sausage', 'mushroom', 'red_onion', 'ricotta',
            'spinach', 'extra_cheese', 'extra_sauce', 'pepperoni', 'green_peppers', 
            'banana_peppers', 'olives', 'basil']
np.random.seed(4065)
toppings_weight = np.random.randint(1, high=20, size=len(toppings))
print(toppings_weight)
toppings_weight = toppings_weight / np.sum(toppings_weight)
n_toppings_weight = [0.2, 0.25, 0.25, 0.2, 0.10] 
drink_size_dict = {'12 oz': 1.5, '20 oz': 2.0, '32 oz': 3.0}
drink_size_weight = [0.15, 0.55, 0.3]
drink_type = ['cola', 'diet_cola', 'lemon_lime', 'orange', 'sweet_tea', 'not_sweet_tea', 'ridge_rage']
drink_type_weight = [0.3, 0.15, 0.15, 0.1, 0.08, 0.02, 0.2]

[ 2  7  7  6 14 16  7 17  3 14  5  8  7  6  4]


In [3]:
class order:
    def __init__(self, number, date, time, customer='unk'):
        self.items = []
        self.sale_total = 0
        self.number = number
        self.customer = customer
        self.date = date
        self.time = time
        self.cols = ['order_no', 'sale_total', 'customer', 'date', 'time',
                     'type', 'subtotal', 'format', 'cheese', 'toppings',
                     'size', 'flavor']
               
    def add_item(self, item):
        self.items.append(item)
        self.sale_total += item.price
        
    def gen_items(self, pw, sw):
        n_pies = np.random.choice(np.arange(0, len(pw)), p=pw)
        for n in range(n_pies):
            p = pie_item()
            p.gen_item()
            rder.add_item(p)

        slices_too = np.random.rand()
        n_slices = 0
        if (n_pies < 3 and slices_too < 0.12) or n_pies == 0:
            n_slices = np.random.choice(np.arange(1, len(sw)+1), p=sw)
            for n in range(n_slices):
                s = slice_item()
                s.gen_item()
                rder.add_item(s)
    
        n_drinks = max(0,(n_slices + n_pies) + np.random.randint(-4, 0))
        for n in range(n_drinks):
            d = drink_item()
            d.gen_item()
            rder.add_item(d)
            
    def dict_item(self, item):
        d = {'order_no': self.number, 'sale_total': self.sale_total, 'customer': self.customer,
             'date': self.date, 'time': self.time}
        di = item.dict_item()
        d.update(di)
        return d
       
    def print_order(self):
        print('number: ' + str(self.number))
        print('customer: ' + str(self.customer))
        print('date: ' + str(self.date))
        print('time: ' + str(self.time))
        print('n items: ' + str(len(self.items)))
        print('total: $' + str(self.sale_total))
        for it in self.items:
            it.print_item()
        print()

In [4]:
class slice_item:
    def __init__(self):
        self.type = 'slice'
        self.price = 0
        self.slice_format = None
        self.cheese = None
        self.n_toppings = 0
        self.toppings = []
        
    def gen_item(self):
        self.slice_format = np.random.choice(list(slice_format_dict.keys()), p=slice_format_weight)
        self.n_toppings = np.random.choice([0, 1, 2, 3, 4], p=n_toppings_weight)
        self.toppings_price = 0.5 * self.n_toppings 
        self.toppings = np.random.choice(toppings, self.n_toppings, p=toppings_weight)
        self.cheese = np.random.choice(cheese, p=cheese_weight)
        self.price += slice_format_dict[self.slice_format]
        self.price += self.toppings_price      
        
    def dict_item(self):
        d = {'type': self.type, 'subtotal': self.price, 'format': self.slice_format,
             'cheese': self.cheese, 'toppings': self.toppings, 'size': None, 'flavor': None}
        return d
    
    def print_item(self):
        print('-- begin slice')
        print('\tslice format: ' + str(self.slice_format))
        print('\tcheese: ' + str(self.cheese))
        print('\ttoppings: ' + ','.join(map(str, self.toppings)))
        print('\tsubtotal: ' + str(self.price))

In [5]:
class pie_item:
    def __init__(self):
        self.type = 'pie'
        self.price = 0
        self.pie_format = None
        self.cheese = None
        self.n_toppings = 0
        self.toppings = []
        
    def gen_item(self):
        self.pie_format = np.random.choice(list(pizza_format_dict.keys()), p=pizza_format_weight)
        self.n_toppings = np.random.choice([0, 1, 2, 3, 4], p=n_toppings_weight)
        self.toppings_price = 0.5 * self.n_toppings * 3.0
        self.toppings = np.random.choice(toppings, self.n_toppings, p=toppings_weight)
        self.cheese = np.random.choice(cheese, p=cheese_weight)
        self.price += pizza_format_dict[self.pie_format]
        self.price += self.toppings_price
        
    def dict_item(self):
        d = {'type': self.type, 'subtotal': self.price, 'format': self.pie_format,
             'cheese': self.cheese, 'toppings': self.toppings, 'size': None, 'flavor': None}
        return d

    def print_item(self):
        print('-- begin pie')
        print('\tpie format: ' + str(self.pie_format))
        print('\tcheese: ' + str(self.cheese))
        print('\ttoppings: ' + ','.join(map(str, self.toppings)))
        print('\tsubtotal: ' + str(self.price))

In [6]:
class drink_item:
    def __init__(self):
        self.type = 'drink'
        self.price = 0
        self.size = None
        self.flavor = None
        
    def gen_item(self):
        self.size = np.random.choice(list(drink_size_dict.keys()), p=drink_size_weight)
        self.price += drink_size_dict[self.size]
        self.flavor = np.random.choice(drink_type, p=drink_type_weight)

    def dict_item(self):
        d = {'type': self.type, 'subtotal': self.price, 'size': self.size,
             'flavor': self.flavor, 
             'format': None, 'cheese': None, 'toppings': None}
        return d

    def print_item(self):
        print('-- begin drink')
        print('\tsize: ' + str(self.size))
        print('\tflavor: ' + str(self.flavor))
        print('\tsubtotal: ' + str(self.price))

In [7]:
day_weights = [0, 0.75, 0.90, 0.72, 0.92, 0.8, 0]
month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
month_weights = np.random.randint(20, high=25, size=12)
month_weights = month_weights / np.sum(month_weights)
def gen_order_date(w=day_weights):
    gen_days = True
    while gen_days: 
        mon = np.random.choice(np.arange(1, 13), p=month_weights)
        d = datetime.date(2022, mon, np.random.randint(1, month_days[mon-1] + 1))
        r = np.random.rand()
        if r < day_weights[d.weekday()]:
            gen_days = False
    return d

In [8]:
def gen_order_time():
    gen_ts = True
    while gen_ts:
        t = np.random.rand()
        c = np.random.rand()*1.2
        if np.sin((t+2)*2*3.1416/12)**2 + 0.2 < c:
            gen_ts = False
    t = t * 720
    return str(11 + int(t/60)) + ':' + ("%02d" % int(t % 60))

In [9]:
def gen_customer(cust_weights):
    cust_no = None
    if np.random.rand() < 0.56:
        cust_no = 'unknown'
    else:
        cust_no = np.random.choice(np.arange(0, len(cust_weights)), p=cust_weights)
    return cust_no

***
***

### OK, let's generate a dataset

Generate customer roll

In [10]:
n_registered_customers = 235
custs = np.arange(1, n_registered_customers+1)
np.random.seed(2032)
cust_weights = np.random.randint(1, high=20, size=len(custs))
cust_weights = cust_weights / np.sum(cust_weights)

slices_weights = [1/(x+1) for x in range(10)]
slices_weights[8] = 0.002
slices_weights[9] = 0.002
slices_weights[0] = slices_weights[1] / 1.5
slices_weights[1] = slices_weights[1] / 2.0
slices_weights = slices_weights / np.sum(slices_weights)

pies_weights = [1/(x+1)**3 for x in range(7)]
pies_weights = pies_weights / np.sum(pies_weights)

In [17]:
df = pd.DataFrame()
n_orders = 19842
for order_number in range(n_orders):
    if order_number % 1000 == 0: print(order_number)
    date = gen_order_date()
    time = gen_order_time()
    cust = gen_customer(cust_weights)
    rder = order(order_number, date, time, cust)
    rder.gen_items(pies_weights, slices_weights)
    for i in rder.items:
        # df2 = pd.DataFrame.from_dict(rder.dict_item(i))#, orient='index', columns=rder.cols)
        df2 = pd.DataFrame([rder.dict_item(i)], columns=rder.cols)
        df = pd.concat([df, df2])

    # rder.print_order()

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000


In [18]:
display(df)

Unnamed: 0,order_no,sale_total,customer,date,time,type,subtotal,format,cheese,toppings,size,flavor
0,0,3.5,161,2022-04-29,13:16,slice,3.5,sicilian_slice,regular,[extra_sauce],,
0,1,24.5,212,2022-12-13,13:56,slice,3.5,slice,regular,"[extra_cheese, mushroom]",,
0,1,24.5,212,2022-12-13,13:56,slice,4.5,sicilian_slice,regular,"[garlic, banana_peppers, anchovy]",,
0,1,24.5,212,2022-12-13,13:56,slice,4.0,slice,regular,"[pepperoni, extra_sauce, mushroom]",,
0,1,24.5,212,2022-12-13,13:56,slice,4.5,slice,regular,"[extra_sauce, olives, olives, garlic]",,
...,...,...,...,...,...,...,...,...,...,...,...,...
0,19841,37.5,unknown,2022-10-01,16:44,slice,4.5,sicilian_slice,regular,"[basil, mushroom, ricotta]",,
0,19841,37.5,unknown,2022-10-01,16:44,drink,2.0,,,,20 oz,ridge_rage
0,19841,37.5,unknown,2022-10-01,16:44,drink,3.0,,,,32 oz,cola
0,19841,37.5,unknown,2022-10-01,16:44,drink,2.0,,,,20 oz,ridge_rage


In [20]:
df.to_csv('./monte_carlos_pizzeria_data.csv')