### This is an optional workbook in case you want to alter the data generation

In [5]:
import pandas as pd
import numpy as np

# ----- Orders dataset (100 rows, unclean dates) -----
data = {
    'Order_ID': [1001, 1002, 1003, 1004, 1005],
    'Customer': ['Kasun', 'Amaya', 'Dilshan', 'Sajantha', 'Dinura'],
    'Order_Date': ['2024/01/02', '2024-01-05', '2024-01-07', '07-01-2024', '2024.01.10'],
    'Pizza_Type': ['Pepperoni', 'Veggie', 'Veggie', 'Margherita', 'Pepperoni'],
    'Size': ['M', 'L', 'S', 'M', 'L'],
    'Branch': ['Colombo', 'Colombo', 'Kandy', 'Galle', 'Kandy']
}

df = pd.DataFrame(data)
np.random.seed(42)

# Mixed date formats
date_formats = ["%Y/%m/%d", "%Y-%m-%d", "%d-%m-%Y", "%Y.%m.%d"]

rows = []
for i in range(100):
    row = df.sample(1).iloc[0].copy()
    row['Order_ID'] = 1001 + i
    orig_date = pd.to_datetime(row['Order_Date'], dayfirst=True)
    new_date = orig_date + pd.to_timedelta(np.random.randint(0, 31), unit='d')
    fmt = np.random.choice(date_formats)
    row['Order_Date'] = new_date.strftime(fmt)
    rows.append(row)

pizza_orders = pd.DataFrame(rows).reset_index(drop=True)

# ----- Add Quantity column -----
pizza_orders['Quantity'] = np.random.randint(1, 6, size=len(pizza_orders))  # 1 to 5 pizzas per order

pizza_orders

Unnamed: 0,Order_ID,Customer,Order_Date,Pizza_Type,Size,Branch,Quantity
0,1001,Amaya,2024/05/08,Veggie,L,Colombo,4
1,1002,Sajantha,29-01-2024,Margherita,M,Galle,2
2,1003,Dinura,2024.10.08,Pepperoni,L,Kandy,2
3,1004,Dinura,2024.10.24,Pepperoni,L,Kandy,3
4,1005,Dinura,2024-10-12,Pepperoni,L,Kandy,2
...,...,...,...,...,...,...,...
95,1096,Sajantha,11-01-2024,Margherita,M,Galle,2
96,1097,Dinura,2024/10/20,Pepperoni,L,Kandy,5
97,1098,Kasun,2024/02/16,Pepperoni,M,Colombo,5
98,1099,Dinura,05-10-2024,Pepperoni,L,Kandy,3


In [6]:
pizza_orders.to_csv('pizza_orders.csv')

In [7]:
# ----- Pizza reference table -----
pizza_prices = pd.DataFrame([
    {'Pizza_Type': 'Pepperoni', 'Size': 'S', 'Price': 1000},
    {'Pizza_Type': 'Pepperoni', 'Size': 'M', 'Price': 1500},
    {'Pizza_Type': 'Pepperoni', 'Size': 'L', 'Price': 2000},
    {'Pizza_Type': 'Veggie', 'Size': 'S', 'Price': 900},
    {'Pizza_Type': 'Veggie', 'Size': 'M', 'Price': 1400},
    {'Pizza_Type': 'Veggie', 'Size': 'L', 'Price': 1900},
    {'Pizza_Type': 'Margherita', 'Size': 'S', 'Price': 800},
    {'Pizza_Type': 'Margherita', 'Size': 'M', 'Price': 1300},
    {'Pizza_Type': 'Margherita', 'Size': 'L', 'Price': 1800},
])

In [8]:
pizza_prices.to_csv('pizza_prices.csv')