In [1]:
import random
import os

import neat
from neat import DistributedEvaluator, ParallelEvaluator
from tqdm.notebook import tqdm, trange
from pathlib import Path
from random import randint
from ui.python.Layout import Layout
import numpy as np

from helpers.estimation_helpers import *
import helpers.visualize as visualize

import pandas as pd

In [2]:
MAX_WORKERS = 10
SLICE_SIZE = 400
EPOCHS = 100
SCORE_COEFFICIENTS = (500, 350, 150)
layout = Layout('./../data/layouts/genetic/step1-max/layout_4_racks.json').reset_item_count().reset_path_count()
item_list = None
check_list = None
check_ids = None
df = None
best = None
str_item = None
selected_categories = [
 'bakery',
 'beverages',
 'breakfast',
 'canned goods',
 'dairy eggs',
 'deli',
 'dry goods pasta',
 'frozen',
 'household',
 'meat seafood',
 'pantry',
 'produce',
 'snacks']

In [3]:
check_config = {
    1: 15,
    2: 25,
    3: 30,
    4: 30,
    5: 35,
    6: 30,
    7: 25,
}

In [4]:
def get_order_items(order_id):
    return order_id, list(map(str, df[df['order_id'] == order_id]['product_id'].tolist()))

def create_input_for_genome(layout, i, j, level):
    def tile_enum_to_int(tile):
        if tile.type.value == 'wall':
            return 0
        if tile.type.value == 'floor':
            return 1
        if tile.type.value == 'rack':
            return 2
        if tile.type.value == 'door':
            return 3
        if tile.type.value == 'cashier':
            return 4
        return 0

    def convert_items_to_int(items):
        ids = []
        if len(items) == 0:
            return [(-1, -1), (-1, -1), (-1, -1), (-1, -1)]
        for item in items:
            if item[0] == '':
                ids.append((-1, 0))
            else:
                ids.append((int(item[0]), int(item[1])))
        return ids

    tile_info = get_tile_info(layout, i, j)
    if tile_info is None:
        return None

    left_products = convert_items_to_int(tile_info['left_products'])
    right_products = convert_items_to_int(tile_info['right_products'])

    return [
        level,
        tile_info['dist_to_cashier'],
        tile_info['dist_to_exit'],
        tile_info['orientation'][0],
        tile_info['orientation'][1],
        left_products[0][0],
        left_products[0][1],
        left_products[1][0],
        left_products[1][1],
        left_products[2][0],
        left_products[2][1],
        left_products[3][0],
        left_products[3][1],
        right_products[0][0],
        right_products[0][1],
        right_products[1][0],
        right_products[1][1],
        right_products[2][0],
        right_products[2][1],
        right_products[3][0],
        right_products[3][1],
        tile_enum_to_int(layout[i - 1][j]),
        tile_enum_to_int(layout[i + 1][j]),
        tile_enum_to_int(layout[i][j - 1]),
        tile_enum_to_int(layout[i][j + 1]),
        tile_enum_to_int(layout[i - 1][j - 1]),
        tile_enum_to_int(layout[i + 1][j + 1]),
        tile_enum_to_int(layout[i - 1][j + 1]),
        tile_enum_to_int(layout[i + 1][j - 1]),
    ]


In [5]:
df = pd.read_csv('./../data/datasets/ECommerce_consumer behaviour.csv')
df = df[['order_id', 'user_id', 'order_number', 'department', 'product_id', 'product_name']]
check_list = []

df = df[df['department'].isin(selected_categories)]


def get_order_items(order_id):
    order = df[df['order_id'] == order_id]
    is_in_category = order['department'].apply(lambda x: x in selected_categories)
    return order_id, order[is_in_category]['product_name'].unique().tolist()\

# Create check list
check_ids = df['order_id'].unique().tolist()
check_list = []
for check_id in tqdm(check_ids[:10000]):
    check = get_order_items(check_id)
    check_list.append(check)

def get_checks_of_specific_length(check_list, length):
    return [x for x in check_list if len(x[1]) == length]

def get_checks_of_specific_length_range(check_list, range_dict):
    # range dict: length - n_of_checks
    res = []
    for key in tqdm(range_dict.keys()):
        res += get_checks_of_specific_length(check_list, key)[:range_dict[key]]
    return res

tuned_checks = get_checks_of_specific_length_range(check_list, check_config)

# convert tuned checks item names to item ids
check_list = []
name_id_df = df[['product_name', 'product_id']].drop_duplicates()
# ids are not sequential numbers, so we need to map them to sequential numbers
name_id_df['product_id_norm'] = range(1, len(name_id_df) + 1)
for check in tuned_checks:
    check_list.append((check[0], [str(name_id_df[name_id_df['product_name'] == x]['product_id_norm'].values[0]) for x in check[1]]))

#convert layout items to item ids
str_items = name_id_df['product_id_norm'].unique().tolist()
str_items = [str(x) for x in str_items]
layout.set_item_list(str_items, reset_items=False)
for i in range(layout.shape[0]):
    for j in range(layout.shape[1]):
        if layout[i][j].type.name == 'RACK':
            items = layout[i][j].items
            for level in range(layout.get_max_rack_level()):
                item = items[level]
                new_item = name_id_df[name_id_df['product_name'] == item[0]]['product_id_norm'].values[0]
                layout.set_item_to_rack(str(new_item), (i, j), level=level)

best = None

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

# Data preparation

idea - take a layout, generate all possible changes for every rack, evaluate them and feed it to decision tree: state-action as X, reward diff as Y

In [11]:
data = {
    "state": [],
    "action": [],
    "reward": [], 
    "info": []
}

In [12]:
curr_info, _ = thread_func(layout, check_list)
current_reward = calculate_score(curr_info, layout, check_list, SCORE_COEFFICIENTS)

In [13]:
layout.reset_item_count()
layout.reset_path_count()

<ui.python.Layout.Layout at 0x7f1d2399f0d0>

In [14]:
from concurrent.futures import ProcessPoolExecutor

for i in trange(layout.shape[0]):
    for j in trange(layout.shape[1]):
        if layout[i][j].type.name == 'RACK':
            for level in range(layout.get_max_rack_level()):
                input_data = create_input_for_genome(layout, i, j, level)
                if input_data is None:
                    continue
                data['state'].append(input_data)
                futures = []
                with ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
                    for item in str_items:
                        l = layout.copy()
                        l.set_item_to_rack(item, (i, j), level=level)
                        futures.append(executor.submit(thread_func, l, check_list))
                    for future in futures:
                        new_info, _ = future.result()
                        new_reward = calculate_score(new_info, l, check_list, SCORE_COEFFICIENTS)
                        data['action'].append(item)
                        data['reward'].append(new_reward - current_reward)
                        data['info'].append(new_info)

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

In [19]:
for k in data.keys():
    print(k, len(data[k]))

state 568
action 55664
reward 55664
info 55664


In [20]:
# save data to file
import pickle
with open('./../data/datasets/data.pkl', 'wb') as f:
    pickle.dump(data, f)

In [21]:
# load data from file
with open('./../data/datasets/data.pkl', 'rb') as f:
    data = pickle.load(f)

In [30]:
# create dataframe


[3,
 18,
 28,
 1,
 0,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 42,
 10,
 19,
 10,
 19,
 10,
 19,
 10,
 0,
 1,
 0,
 2,
 0,
 1,
 0,
 2]