In [None]:

import numpy as np
import pandas as pd
from ast import literal_eval

In [None]:
# Create Pivot Table Orderlines by Location
def lines_locations(df_orderlines, filename):

	# Groupby 
	df_count = pd.DataFrame(df_orderlines.groupby(['Alley_Number', 'Cellule'])['Coord'].count()).reset_index()
	df_pivot = pd.pivot_table(df_count, values='Coord', index=['Cellule'],
	                    columns=['Alley_Number'], aggfunc=np.sum).fillna(0)
	# Total 
	lines_total = df_pivot.sum().sum()
	df_pivot = df_pivot/lines_total
	df_pivot.to_excel(filename)

	return df_pivot

In [None]:

from scipy.cluster.vq import kmeans2, whiten
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import ward, fcluster
from calculate_distance import dist_func
from pre_processing import mono_order

# 1. Clustering + Mapping
def clustering_loc(df, distance_threshold, dist_method, orders_number, 
	wave_start, clust_start, df_type):

	# 1. Create Clusters
	list_coord, list_OrderNumber, clust_id, df = cluster_wave(df, distance_threshold, 'custom', clust_start, df_type)
	clust_idmax = max(clust_id) # Last Cluster ID

	# 2. Mapping Order lines with clusters
	dict_map, dict_omap, df, Wave_max = lines_mapping_clst(df, list_coord, list_OrderNumber, clust_id, orders_number, wave_start)

	return dict_map, dict_omap, df, Wave_max, clust_idmax

# 2. Create Waves using Clusters
def cluster_wave(df, distance_threshold, dist_method, clust_start, df_type):

	# Create Column for Clustering
	if df_type == 'df_mono':
		df['Coord_Cluster'] = df['Coord'] 

	# Mapping points
	df_map = pd.DataFrame(df.groupby(['OrderNumber', 'Coord_Cluster'])['SKU'].count()).reset_index() 	# Here we use Coord Cluster
	list_coord, list_OrderNumber = np.stack(df_map.Coord_Cluster.apply(lambda t: literal_eval(t)).values), df_map.OrderNumber.values

	# Cluster picking locations
	clust_id = cluster_locations(list_coord, distance_threshold, dist_method, clust_start)
	clust_id = [(i + clust_start) for i in clust_id]

	# List_coord
	list_coord = np.stack(list_coord)

	return list_coord, list_OrderNumber, clust_id, df

# 3. Create clusters of (x, y)
def cluster_locations(list_coord, distance_threshold, dist_method, clust_start):

	# Create linkage matrix
	if dist_method == 'euclidian':
		Z = ward(pdist(np.stack(list_coord)))
	else:
		Z = ward(pdist(np.stack(list_coord), metric = distance_func))
	# Single cluster array
	fclust1 = fcluster(Z, t = distance_threshold, criterion = 'distance')

	return fclust1

In [None]:
# Mapping Order lines mapping with clustering 
def lines_mapping_clst(df, list_coord, list_OrderNumber, clust_id, orders_number, wave_start):

	# Dictionnary for mapping by cluster
	dict_map = dict(zip(list_OrderNumber, clust_id))
	# Dataframe mapping
	df['ClusterID'] = df['OrderNumber'].map(dict_map)
	# Order by ID and mapping
	df = df.sort_values(['ClusterID','OrderNumber'], ascending = True)
	list_orders = list(df.OrderNumber.unique())
	# Dictionnary for order mapping 
	dict_omap = dict(zip(list_orders, [i for i in range(1, len(list_orders))]))
	# Order ID mapping
	df['OrderID'] = df['OrderNumber'].map(dict_omap)

	# Create Waves: Increment when reaching orders_number or changing cluster
	df['WaveID'] = wave_start + ((df.OrderID%orders_number == 0) | (df.ClusterID.diff() != 0)).shift(1).fillna(0).cumsum() 
	Wave_max = df.WaveID.max()

	return dict_map, dict_omap, df, Wave_max

In [None]:
# Centroid function
def centroid(list_in):
    x, y = [p[0] for p in list_in], [p[1] for p in list_in]
    centroid = [round(sum(x) / len(list_in),2), round(sum(y) / len(list_in), 2)]
    return centroid

# Mapping Centroids
def centroid_mapping(df_multi):

    # Mapping multi
    df_multi['Coord'] = df_multi['Coord'].apply(literal_eval)
    # Group coordinates per order
    df_group = pd.DataFrame(df_multi.groupby(['OrderNumber'])['Coord'].apply(list)).reset_index()
    # Calculate Centroid
    df_group['Coord_Centroid'] = df_group['Coord'].apply(centroid)
    # Dictionnary for mapping
    list_order, list_coord = list(df_group.OrderNumber.values), list(df_group.Coord_Centroid.values)
    dict_coord = dict(zip(list_order, list_coord))

    # Final mapping
    df_multi['Coord_Cluster'] = df_multi['OrderNumber'].map(dict_coord).astype(str)
    df_multi['Coord'] = df_multi['Coord'].astype(str)

    return df_multi