# EDA - How does order size effect delta?
The number of items per order is called batch size. For lower batch sizes, the optimality of a configuration plays a bigger role.

In [1]:
import os
import numpy as np
from matplotlib import pyplot as plt
from colorama import Fore, Style, init
import csv
import statistics
import math
import pandas as pd
import random
import json
import glob

In [2]:
from classes.CellIterator import CellIterator
from classes.Item import Item
from classes.Worker import Worker
from classes.Warehouse import Warehouse

### Create the warehouses

In [3]:
NUM_BAYS = 6
RACK_LENGTH = 15
CELLS_PER_BAY = 20
CELL_LENGTH = RACK_LENGTH / CELLS_PER_BAY
RACK_WIDTH = 0.8
BAY_WIDTH = RACK_WIDTH / 2
LEVEL_HEIGHT = 0.2

WHR = Warehouse(
    NUM_BAYS,
    RACK_LENGTH,
    CELLS_PER_BAY,
    BAY_WIDTH,
    LEVEL_HEIGHT
)

WHS = Warehouse(
    NUM_BAYS,
    RACK_LENGTH,
    CELLS_PER_BAY,
    BAY_WIDTH,
    LEVEL_HEIGHT
)

### Utility Functions

In [4]:
def log_total_cells_to_csv(costs, file_path):
    # Ensure the directory exists
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    # Check if the file exists
    file_exists = os.path.isfile(file_path)
    
    # Open the file in append mode
    with open(file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        # If the file doesn't exist, write the header
        if not file_exists:
            headers = ['n'] + [f'cst{i+1}' for i in range(len(costs))] + ['avg', 'min', 'max', 'std', 'moe']
            writer.writerow(headers)

        # Get the trial number
        if file_exists:
            # Read the existing data to determine the trial number
            with open(file_path, mode='r') as read_file:
                reader = csv.reader(read_file)
                rows = list(reader)
                trial_number = len(rows)  # Assuming the header is included, this will give the next trial number
        else:
            trial_number = 1

        # Calculate average, minimum, maximum, and variance
        avg_cost = sum(costs) / len(costs)
        min_cost = min(costs)
        max_cost = max(costs)
        std_cost = round(math.sqrt(statistics.variance(costs)) if len(costs) > 1 else 0, 2)
        moe_cost = round(margin_of_error(std_cost, len(costs)), 2)

        # Write the new record
        writer.writerow([trial_number] + costs + [avg_cost, min_cost, max_cost, std_cost, moe_cost])

def margin_of_error(std_dev, sample_size, z_score=1.96):
    return z_score * (std_dev / math.sqrt(sample_size))

def delete_file(file):
    if os.path.exists(file):
        os.remove(file)
        # print(f"{file} has been deleted successfully.")

def clear_directory(directory_path):
    # Ensure the directory exists
    if not os.path.isdir(directory_path):
        print(f"Directory {directory_path} does not exist.")
        return

    # Use glob to match all files in the directory
    files = glob.glob(os.path.join(directory_path, '*'))
    
    # Loop through and delete each file
    for file in files:
        try:
            os.remove(file)
            # print(f"Deleted {file}")
        except Exception as e:
            print(f"Error deleting file {file}: {e}")

### EDA

In [5]:
RESULTS_PATH = "results/"
results_random_path = os.path.join(RESULTS_PATH, "wms-random.csv")

In [6]:
# Function to add a new record to the summary DataFrame
def add_summary_record(df):
    num_order_lists = df.shape[1] - 6  # Number of cost columns, subtract 'n' and 'avg' columns
    avg_avg = df['avg'].mean()
    avg_min = df['min'].mean()
    avg_max = df['max'].mean()
    avg_std = df['std'].mean()
    avg_moe = df['moe'].mean()
    
    new_record = {
        "num_order_lists": num_order_lists,
        "avg": avg_avg,
        "min": avg_min,
        "max": avg_max,
        "std": avg_std,
        "moe": avg_moe
    }
    
    new_record_df = pd.DataFrame([new_record])
    
    global df_summary
    df_summary = pd.concat([df_summary, new_record_df], ignore_index=True)
    df_summary.to_csv("results/wms-orderlist.csv", index=False)
    print("New record added successfully!")

## Datagen

In [7]:
# Read the CSV data from the file
def load_order_history(input_file):
    file_path = 'data/order_history_small.csv'
    items = []

    with open(file_path, mode='r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            items.append({
                "id": int(row["id"]),
                "name": row["name"],
                "quantity": int(row["quantity"]),
                "volume": int(row["volume"]),
                "cover": int(row["cover"])
            })

    total_quantity_6m = sum([item['quantity'] for item in items])
    item_probabilities = [item['quantity'] / total_quantity_6m for item in items]

    return items, item_probabilities

# Generate orders
def generate_orders(num_orders_per_file, file_num, items, item_probabilities, order_size):
    orders = []
    for _ in range(num_orders_per_file):
        order_items = random.choices(items, weights=item_probabilities, k=order_size)
        orders.append({"items": order_items})

    # Save the orders to a JSON file
    with open(f'data/orders/orders{file_num}.json', 'w') as json_file:
        json.dump(orders, json_file, indent=4)

def datagen(num_order_lists, num_orders_per_file, order_size):
    items, item_probabilities = load_order_history("data/order_history_small.csv")
    for file_num in range(num_order_lists):
        generate_orders(num_orders_per_file, file_num, items, item_probabilities, order_size)

In [8]:
def wms_runner(WH, type, file_num):
    WH.generate_item_list_small("./data/order_history_small.csv")
    
    # create a warehouse configuration (random / smart)
    if type == "random": WH.sprinkle_aisle()
    elif type == "smart": WH.smart_sprinkle()

    directory = 'data/orders'
    costs = []

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if not os.path.isfile(file_path): continue

        WH.read_orders(file_path)
        WH.dist_batched_order_list()

        for worker in WH.workers: worker.min_max_order_list(WH.aisle_items)

        total_cells = 0
        for worker in WH.workers:
            worker.fulfill_batched_order()
            total_cells += worker.cells_travelled

        costs.append(total_cells)
        WH.reset("soft")

    if type == "random": log_total_cells_to_csv(costs, f"results/wms-random/{file_num}.csv")
    if type == "smart": log_total_cells_to_csv(costs, f"results/wms-smart/{file_num}.csv")

    WH.reset("hard")

## EDA Runner

In [9]:
def wms_eda_ordersize(max_order_size, step_size, sample_size, type):
    if type == "random": clear_directory("results/wms-random/")
    if type == "smart": clear_directory("results/wms-smart/")
    # delete_file("results/wms-orderlist.csv")

    if type == "random": WHR.reset(type="hard")
    if type == "smart": WHS.reset(type="hard")
    
    
    for order_size in range(1, max_order_size + 1, step_size):
        # generate order files
        clear_directory('data/orders/')
        datagen(num_order_lists=20, num_orders_per_file=sample_size, order_size=order_size)

        if type == "random":
            # generate wms-random.csv
            for _ in range(sample_size):
                wms_runner(WHR, type="random", file_num=order_size)

        if type == "smart":
            wms_runner(WHS, type="smart", file_num=order_size)

        print(f"Order size {order_size} completed")

In [10]:
wms_eda_ordersize(
    max_order_size=10,
    step_size=3,
    sample_size=300,
    type="smart"
)

[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37mOrder size 1 completed
[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37mOrder size 4 completed
[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37mOrder size 7 completed
[37m[37m[37m[37m[37m[37m

In [11]:
wms_eda_ordersize(
    max_order_size=10,
    step_size=3,
    sample_size=100,
    type="random"
)

[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m[37m