In [1]:
import random
import os

from neat import DistributedEvaluator, ParallelEvaluator
from tqdm.notebook import tqdm, trange
from pathlib import Path
from random import randint
from ui.python.Layout import Layout
import numpy as np

from helpers.estimation_helpers import *

In [2]:
import pandas as pd

In [3]:
MAX_WORKERS = 24
SLICE_SIZE = 1000
EPOCHS = 40
SCORE_COEFFICIENTS = (1000, 0, 0)

# Preprocessing

In [4]:
df = pd.read_csv('./../data/datasets/ECommerce_consumer behaviour.csv')
df

Unnamed: 0,order_id,user_id,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,department_id,department,product_name
0,2425083,49125,1,2,18,,17,1,0,13,pantry,baking ingredients
1,2425083,49125,1,2,18,,91,2,0,16,dairy eggs,soy lactosefree
2,2425083,49125,1,2,18,,36,3,0,16,dairy eggs,butter
3,2425083,49125,1,2,18,,83,4,0,4,produce,fresh vegetables
4,2425083,49125,1,2,18,,83,5,0,4,produce,fresh vegetables
...,...,...,...,...,...,...,...,...,...,...,...,...
2019496,3390742,199430,16,3,18,5.0,83,8,0,4,produce,fresh vegetables
2019497,458285,128787,42,2,19,3.0,115,1,1,7,beverages,water seltzer sparkling water
2019498,458285,128787,42,2,19,3.0,32,2,1,4,produce,packaged produce
2019499,458285,128787,42,2,19,3.0,32,3,1,4,produce,packaged produce


In [5]:
df = df[['order_id', 'user_id', 'order_number', 'department', 'product_id', 'product_name']]
df

Unnamed: 0,order_id,user_id,order_number,department,product_id,product_name
0,2425083,49125,1,pantry,17,baking ingredients
1,2425083,49125,1,dairy eggs,91,soy lactosefree
2,2425083,49125,1,dairy eggs,36,butter
3,2425083,49125,1,produce,83,fresh vegetables
4,2425083,49125,1,produce,83,fresh vegetables
...,...,...,...,...,...,...
2019496,3390742,199430,16,produce,83,fresh vegetables
2019497,458285,128787,42,beverages,115,water seltzer sparkling water
2019498,458285,128787,42,produce,32,packaged produce
2019499,458285,128787,42,produce,32,packaged produce


In [6]:
df['product_name'].unique()

array(['baking ingredients', 'soy lactosefree', 'butter',
       'fresh vegetables', 'yogurt', 'canned meals beans',
       'poultry counter', 'ice cream ice', 'fresh fruits', 'milk',
       'packaged cheese', 'bread', 'tea', 'bakery desserts',
       'frozen breakfast', 'cereal', 'eggs', 'buns rolls', 'cream',
       'water seltzer sparkling water', 'pickled goods olives',
       'packaged poultry', 'other creams cheeses',
       'honeys syrups nectars', 'coffee', 'refrigerated',
       'energy granola bars', 'soft drinks', 'latino foods',
       'plates bowls cups flatware', 'paper goods', 'oral hygiene',
       'diapers wipes', 'food storage', 'nuts seeds dried fruit', 'soap',
       'packaged vegetables fruits', 'hot dogs bacon sausage',
       'lunch meat', 'chips pretzels', 'meat counter',
       'fresh dips tapenades', 'prepared soups salads', 'condiments',
       'juice nectars', 'canned fruit applesauce',
       'preserved dips spreads', 'packaged produce',
       'canned jarr

In [7]:
df['department'].unique()

array(['pantry', 'dairy eggs', 'produce', 'canned goods', 'meat seafood',
       'frozen', 'bakery', 'beverages', 'breakfast', 'snacks',
       'international', 'household', 'personal care', 'babies', 'deli',
       'dry goods pasta', 'missing', 'alcohol', 'pets', 'bulk', 'other'],
      dtype=object)

In [8]:
item_list = df['product_id'].unique().tolist()
item_list

[17,
 91,
 36,
 83,
 120,
 59,
 35,
 37,
 24,
 84,
 21,
 112,
 94,
 8,
 52,
 121,
 86,
 43,
 53,
 115,
 110,
 49,
 108,
 29,
 26,
 31,
 3,
 77,
 30,
 111,
 54,
 20,
 56,
 85,
 117,
 25,
 123,
 106,
 96,
 107,
 122,
 67,
 1,
 72,
 98,
 99,
 51,
 32,
 81,
 12,
 9,
 116,
 129,
 69,
 131,
 13,
 16,
 130,
 104,
 63,
 58,
 100,
 23,
 57,
 133,
 64,
 78,
 45,
 50,
 128,
 14,
 27,
 75,
 66,
 34,
 38,
 88,
 46,
 11,
 93,
 125,
 101,
 126,
 48,
 4,
 124,
 89,
 105,
 19,
 92,
 44,
 40,
 82,
 79,
 5,
 42,
 55,
 134,
 61,
 114,
 15,
 68,
 119,
 109,
 80,
 62,
 65,
 95,
 70,
 60,
 71,
 2,
 18,
 28,
 127,
 22,
 47,
 90,
 118,
 74,
 7,
 6,
 41,
 76,
 97,
 39,
 103,
 102,
 87,
 33,
 73,
 10,
 132,
 113]

In [9]:
# return tuple {orderId, list(str(items_ids))} for single check
def get_order_items(order_id):
    return order_id, list(map(str, df[df['order_id'] == order_id]['product_id'].tolist()))

In [10]:
check_ids = df['order_id'].unique().tolist()
check_list = []
for check_id in tqdm(check_ids[:2000]):
    check_list.append(get_order_items(check_id))

  0%|          | 0/2000 [00:00<?, ?it/s]

# Layout functions

In [11]:
def random_layout():
    layout = Layout('./../data/layout 18x25_6.json')
    layout.set_item_list(df['product_id'].unique().tolist())
    for row in range(layout.shape[0]):
        for col in range(layout.shape[1]):
            if layout[row][col].type.name == 'RACK':
                for lev in range(layout.get_max_rack_level()):
                    layout.set_item_to_rack(random.choice(layout.get_item_list()), (row, col), level=lev)
    return layout

In [12]:
layout = Layout('./../data/layout 18x25_6.json').get_empty_rack_layout()
items = df['product_id'].unique().tolist()
str_item = [str(i) for i in items]
layout.set_item_list(str_item)

# NEAT functions

In [13]:
import neat

In [14]:
def create_input_for_genome(layout, i, j, level):
    def tile_enum_to_int(tile):
        if tile.type.value == 'wall':
            return 0
        if tile.type.value == 'floor':
            return 1
        if tile.type.value == 'rack':
            return 2
        if tile.type.value == 'door':
            return 3
        if tile.type.value == 'cashier':
            return 4
        return 0
    
    def convert_items_to_int(items):
        ids = []
        if len(items) == 0:
            return [(-1, -1), (-1, -1), (-1, -1), (-1, -1)]
        for item in items:
            if item[0] == '':
                ids.append((-1, 0))
            else:
                ids.append((int(item[0]), int(item[1])))
        return ids
    
    tile_info = get_tile_info(layout, i, j)
    if tile_info is None:
        return None
    
    left_products = convert_items_to_int(tile_info['left_products'])
    right_products = convert_items_to_int(tile_info['right_products'])
    
    return [
        level,
        tile_info['dist_to_cashier'], 
        tile_info['dist_to_exit'], 
        tile_info['orientation'][0],
        tile_info['orientation'][1],
        left_products[0][0],
        left_products[0][1],
        left_products[1][0],
        left_products[1][1],
        left_products[2][0],
        left_products[2][1],
        left_products[3][0],
        left_products[3][1],
        right_products[0][0],
        right_products[0][1],
        right_products[1][0],
        right_products[1][1],
        right_products[2][0],
        right_products[2][1],
        right_products[3][0],
        right_products[3][1],
        tile_enum_to_int(layout[i-1][j]),
        tile_enum_to_int(layout[i+1][j]),
        tile_enum_to_int(layout[i][j-1]),
        tile_enum_to_int(layout[i][j+1]),
        tile_enum_to_int(layout[i-1][j-1]),
        tile_enum_to_int(layout[i+1][j+1]),
        tile_enum_to_int(layout[i-1][j+1]),
        tile_enum_to_int(layout[i+1][j-1]),
    ]

In [15]:
def eval_genomes(genomes, config):
    _layout = layout.copy()
    for genome_id, genome in genomes:
        genome.fitness = 0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        for i in range(_layout.shape[0]):
            for j in range(_layout.shape[1]):
                for level in range(_layout.get_max_rack_level()):
                    inputs = create_input_for_genome(_layout, i, j, level)
                    if inputs is not None:
                        output = net.activate(inputs)
                        _layout.set_item_to_rack(str(np.argmax(output)+1), (i, j), level)
        res_dict, processed_layout = thread_func(_layout, check_list[:SLICE_SIZE], use_item_count=True)
        estimation = calculate_score(res_dict, processed_layout, check_list[:SLICE_SIZE], SCORE_COEFFICIENTS)
        genome.fitness = estimation

In [16]:
def eval_genome(genome, config):
    _layout = layout.copy()
    genome.fitness = 0
    net = neat.nn.FeedForwardNetwork.create(genome, config)
    for i in range(_layout.shape[0]):
        for j in range(_layout.shape[1]):
            for level in range(_layout.get_max_rack_level()):
                inputs = create_input_for_genome(_layout, i, j, level)
                if inputs is not None:
                    output = net.activate(inputs)
                    _layout.set_item_to_rack(str(np.argmax(output)+1), (i, j), level)
    res_dict, processed_layout = thread_func(_layout, check_list[:SLICE_SIZE], use_item_count=True)
    estimation = calculate_score(res_dict, processed_layout, check_list[:SLICE_SIZE], SCORE_COEFFICIENTS)
    return estimation

In [17]:
best = None

In [18]:
def run(config_file):
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

    # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5))
    
    #pe = ParallelEvaluator(MAX_WORKERS, eval_genome)

    # Run for up to 300 generations.
    winner = p.run(eval_genomes, EPOCHS)
    global best 
    best = winner


In [19]:
config_file = './neat_config.cfg'

# Run NEAT

In [None]:
if __name__ == '__main__':
    run(config_file)


 ****** Running generation 0 ****** 


In [20]:
# load state from checkpoint 99
p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-99')

# get the winner
best = p.run(eval_genomes, 1)


Mean genetic distance 2.017, standard deviation 0.213


In [21]:
def generate_best_layout(layout, genome, config_file):
    _layout = layout.copy()
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)
    net = neat.nn.FeedForwardNetwork.create(genome, config)
    for i in range(_layout.shape[0]):
        for j in range(_layout.shape[1]):
            for level in range(_layout.get_max_rack_level()):
                inputs = create_input_for_genome(_layout, i, j, level)
                if inputs is not None:
                    output = net.activate(inputs)
                    _layout.set_item_to_rack(str(np.argmax(output)+1), (i, j), level)
    return _layout

In [22]:
best_layout = generate_best_layout(layout, best, config_file)

AttributeError: 'DefaultGenome' object has no attribute 'get_pruned_copy'

In [433]:
curr_dir = Path(os.getcwd()).parent
best_layout.display_in_window(home_dir=str(curr_dir), debug=False)

In [434]:
score, tested_layout = thread_func(best_layout, check_list, use_item_count=True)

In [435]:
score

{'path': 18, 'invalid': 1999, 'rack_uniformity': 2, 'tile_uniformity': 5}

In [436]:
calculated_score = calculate_score(score, tested_layout, check_list)
calculated_score

751.6056338028169

In [437]:
tested_layout.display_in_window(home_dir=str(curr_dir), debug=False)