In [1]:
# Import necessary libraries. 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Suppressing warnings
import warnings  
warnings.filterwarnings('ignore')

In [2]:
# Import the CSV file.
kite = pd.read_csv('clustered_data.csv')

In [3]:
# View the DataFrame.
print(kite.shape)
kite.head()

(4119, 19)


Unnamed: 0.1,Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label
0,0,26.84,2295440,7700,18/04/2023,MFS,375.4,0.0,18/04/2023,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1
1,1,26.84,2277415,67940,28/03/2023,DX Freight,236.61,0.0,28/03/2023,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1
2,2,26.84,2279524,67940,30/03/2023,DX Freight,70.58,13.2,30/03/2023,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1
3,3,26.84,2275231,483789,27/03/2023,DX Express,76.95,7.22,27/03/2023,6.81,17.77,0.0402,0.030342,766,DY6,United Kingdom,52.496759,-2.173682,1
4,4,26.84,2285334,483789,05/04/2023,DX Express,147.3,0.0,05/04/2023,11.46,34.7,0.100552,0.052361,766,DY6,United Kingdom,52.496759,-2.173682,1


In [4]:
# Drop first column. 
kite = kite.drop('Unnamed: 0', axis=1)

In [5]:
# Explore the metadata.
kite.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4119 entries, 0 to 4118
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   distance        4119 non-null   float64
 1   OrderID         4119 non-null   int64  
 2   WebsiteID       4119 non-null   int64  
 3   OrderDate       4119 non-null   object 
 4   Courier         4119 non-null   object 
 5   ProductsExVAT   4119 non-null   float64
 6   DeliveryExVAT   4119 non-null   float64
 7   DateDespatched  4119 non-null   object 
 8   DeliveryCost    4119 non-null   float64
 9   Weight          4119 non-null   float64
 10  Volume (m3)     4119 non-null   float64
 11  Pallets         4119 non-null   float64
 12  id              4119 non-null   int64  
 13  Outward Code    4119 non-null   object 
 14  Country         4119 non-null   object 
 15  latitude        4119 non-null   float64
 16  longitude       4119 non-null   float64
 17  Cluster Label   4119 non-null   i

In [6]:
kite['DateDespatched'] = pd.to_datetime(kite['DateDespatched'], format='%d/%m/%Y')

In [7]:
# Explore the metadata.
kite.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4119 entries, 0 to 4118
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   distance        4119 non-null   float64       
 1   OrderID         4119 non-null   int64         
 2   WebsiteID       4119 non-null   int64         
 3   OrderDate       4119 non-null   object        
 4   Courier         4119 non-null   object        
 5   ProductsExVAT   4119 non-null   float64       
 6   DeliveryExVAT   4119 non-null   float64       
 7   DateDespatched  4119 non-null   datetime64[ns]
 8   DeliveryCost    4119 non-null   float64       
 9   Weight          4119 non-null   float64       
 10  Volume (m3)     4119 non-null   float64       
 11  Pallets         4119 non-null   float64       
 12  id              4119 non-null   int64         
 13  Outward Code    4119 non-null   object        
 14  Country         4119 non-null   object        
 15  lati

In [8]:
kite.to_csv('kite.csv')

In [9]:
# Creating a column to identify the day of the week that the order was dispatched
kite['weekday'] = kite['DateDespatched'].apply(lambda x: x.weekday())

# Removing orders from the DataFrame that were dispatched on a Saturday (5) or Sunday (6)    
kite_wd = kite.loc[~((kite['weekday'] == 5) | (kite['weekday'] == 6))]

# Checking this has worked
kite_wd['weekday'].unique()

array([1, 3, 0, 2, 4], dtype=int64)

In [10]:
kite_wd = kite_wd[~kite_wd.DateDespatched.isin(['2023-07-04','2023-04-10','2023-04-24'])]

In [11]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,18/04/2023,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,28/03/2023,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,30/03/2023,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
3,26.84,2275231,483789,27/03/2023,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,United Kingdom,52.496759,-2.173682,1,0
4,26.84,2285334,483789,05/04/2023,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,United Kingdom,52.496759,-2.173682,1,2


In [12]:
# View the DataFrame.
print(kite_wd.shape)
kite_wd.head()

(4068, 19)


Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,18/04/2023,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,28/03/2023,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,30/03/2023,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
3,26.84,2275231,483789,27/03/2023,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,United Kingdom,52.496759,-2.173682,1,0
4,26.84,2285334,483789,05/04/2023,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,United Kingdom,52.496759,-2.173682,1,2


In [13]:
# Excluding orders delivered by DX Express
kite_nonDX = kite_wd[kite_wd['Courier'] != 'DX Express']
kite_nonDX

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,18/04/2023,MFS,375.40,0.00,2023-04-18,34.46,110.00,0.225000,0.200000,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,28/03/2023,DX Freight,236.61,0.00,2023-03-28,16.45,33.30,0.959000,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,30/03/2023,DX Freight,70.58,13.20,2023-03-30,11.79,21.80,0.209100,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
5,26.84,2292112,778040,14/04/2023,DX Freight,104.00,0.00,2023-04-14,20.55,44.20,0.123500,0.054167,766,DY6,United Kingdom,52.496759,-2.173682,1,4
6,26.84,2298627,846996,21/04/2023,DX Freight,111.36,0.00,2023-04-21,23.25,51.12,0.129600,0.100000,766,DY6,United Kingdom,52.496759,-2.173682,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4105,42.67,2283291,827222,04/04/2023,MFS,772.80,19.99,2023-04-04,42.65,406.56,0.739200,0.466667,1670,MK15,United Kingdom,52.051214,-0.718167,4,1
4107,19.60,2292974,893554,16/04/2023,DX Freight,109.80,0.00,2023-04-17,34.03,80.00,0.132400,0.111111,96,B68,United Kingdom,52.482799,-2.000643,1,0
4116,34.22,2294508,896767,18/04/2023,DX Freight,90.09,32.22,2023-04-18,10.92,18.90,1.411200,0.750000,2937,WV7,United Kingdom,52.633048,-2.270047,1,1
4117,38.17,2294964,901125,18/04/2023,DX Freight,278.64,0.00,2023-04-18,14.77,29.88,4.070664,1.511111,1456,LE13,United Kingdom,52.763672,-0.890501,2,1


In [14]:
kite_nonDX.shape

(2537, 19)

In [15]:
# Pivot the data to see orders by day by courier. 
df_pallets_by_day = kite_nonDX.pivot_table(index='Cluster Label', columns='DateDespatched', values='Pallets', aggfunc='sum')

# Display the DataFrame
df_pallets_by_day

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,4.626825,5.52084,15.878389,4.072352,8.579204,8.750306,11.926882,16.514156,9.632256,6.564483,6.806422,10.289356,10.948243,5.453371,8.497022,14.05528,7.252378,7.704178
1,38.252106,30.097933,25.875409,30.604619,34.74995,52.090649,59.073879,37.945566,35.859555,64.183745,38.207957,61.017147,40.61065,26.565372,60.519807,29.744151,19.234192,34.790947
2,25.430281,26.711982,28.232383,20.63808,59.148193,25.695971,37.088422,27.698291,22.694177,30.436707,30.523396,34.70345,18.132844,31.487958,22.920581,27.114326,35.655519,21.307101
3,11.5282,12.985316,36.069616,30.100167,9.401686,23.829074,21.668728,27.415797,25.247212,20.095868,33.847965,17.274826,12.430367,26.191024,23.686885,16.836916,18.823916,18.991872
4,15.19434,22.966426,18.886072,14.503915,26.996616,15.696868,10.335742,11.650372,10.634615,17.97022,18.110261,13.856994,23.39673,17.558513,25.580183,14.77755,8.970153,16.775009


In [16]:
# Calculate the minimum pallets per day for each cluster
min_pallets_per_day = df_pallets_by_day.min(axis=1)  

min_pallets_per_day 

Cluster Label
0     4.072352
1    19.234192
2    18.132844
3     9.401686
4     8.970153
dtype: float64

In [49]:
# Calculate the average pallets per day for each cluster
avg_pallets_per_day = pd.DataFrame(df_pallets_by_day.mean(axis=1) )

avg_pallets_per_day 

Unnamed: 0_level_0,0
Cluster Label,Unnamed: 1_level_1
0,9.059552
1,39.96798
2,29.201092
3,21.46808
4,16.881143


In [50]:
# Calculate the median pallets per day for each cluster
median_pallets_per_day = pd.DataFrame(df_pallets_by_day.median(axis=1)) 

median_pallets_per_day 

Unnamed: 0_level_0,0
Cluster Label,Unnamed: 1_level_1
0,8.538113
1,36.902561
2,27.406309
3,20.882298
4,16.235939


In [51]:
# Calculate the maximum pallets per day for each cluster
max_pallets_per_day = pd.DataFrame(df_pallets_by_day.max(axis=1))

max_pallets_per_day 

Unnamed: 0_level_0,0
Cluster Label,Unnamed: 1_level_1
0,16.514156
1,64.183745
2,59.148193
3,36.069616
4,26.996616


In [20]:
# Pivot the data to see number of orders by day by clusters. 
df_orders_by_day = kite_nonDX.pivot_table(index='Cluster Label', columns='DateDespatched', values='OrderID', aggfunc='count')

# Display the DataFrame
df_orders_by_day

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,9,9,17,8,15,13,17,14,14,9,12,13,11,13,16,17,13,11
1,32,54,37,40,30,53,49,39,37,51,54,46,36,44,61,54,42,41
2,42,35,26,23,36,37,39,42,26,34,46,40,23,51,38,48,36,25
3,18,18,29,28,17,36,28,30,31,27,37,19,14,42,35,27,25,19
4,21,24,22,23,21,23,18,16,18,32,23,18,13,26,29,20,16,16


In [21]:
# Calculate the average orders per day for each courier
min_order_per_day = df_orders_by_day.min(axis=1)  

min_order_per_day

Cluster Label
0     8
1    30
2    23
3    14
4    13
dtype: int64

In [22]:
# Calculate the average orders per day for each courier
avg_order_per_day = df_orders_by_day.mean(axis=1)  

avg_order_per_day

Cluster Label
0    12.833333
1    44.444444
2    35.944444
3    26.666667
4    21.055556
dtype: float64

In [23]:
# Calculate the average orders per day for each courier
median_order_per_day = df_orders_by_day.median(axis=1)  

median_order_per_day

Cluster Label
0    13.0
1    43.0
2    36.5
3    27.5
4    21.0
dtype: float64

In [24]:
# Calculate the average orders per day for each courier
max_order_per_day = df_orders_by_day.max(axis=1)  

max_order_per_day

Cluster Label
0    17
1    61
2    51
3    42
4    32
dtype: int64

### Work out optimal truck capacity for daily minimum of pallets

In [25]:
import math

# Maximum capacity of each type of vehicle
pallet_capacity_3_5_tonne_van = 2
pallet_capacity_7_5_tonne_LGV = 14
pallet_capacity_18_tonne_HGV = 24
fill_capacity_percent = 1

# Create a new DataFrame to store the truck type and number of trucks needed for each cluster.
df_trucks_needed_min = pd.DataFrame(index= min_pallets_per_day.index)

# Add columns for average pallet count and each truck size
df_trucks_needed_min['Minimum_Orders_Day'] = min_order_per_day
df_trucks_needed_min['Minimum_Pallets_Day'] = min_pallets_per_day
df_trucks_needed_min['3.5-tonne Van (2)'] = 0
df_trucks_needed_min['7.5-tonne LGV (14)'] = 0
df_trucks_needed_min['18-tonne HGV (24)'] = 0

df_trucks_needed_min

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24)
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,8,4.072352,0,0,0
1,30,19.234192,0,0,0
2,23,18.132844,0,0,0
3,14,9.401686,0,0,0
4,13,8.970153,0,0,0


In [26]:
# Use a for loop to determine the truck type and number of trucks needed for each courier
for cluster in df_trucks_needed_min.index:
    
    # Identifying the pallets needed in each truck for each cluster 
    pallets_needed = df_trucks_needed_min.loc[cluster, 'Minimum_Pallets_Day']

    # Checking if the pallets needed can fit into a 3.5-tonne van
    if pallets_needed <= pallet_capacity_3_5_tonne_van:        
        df_trucks_needed_min.loc[cluster, '3.5-tonne Van (2)'] = 1
        
    # If not, can it fit into a 7.5-tonne LGV
    elif pallet_capacity_3_5_tonne_van < pallets_needed <= pallet_capacity_7_5_tonne_LGV:
        df_trucks_needed_min.loc[cluster, '7.5-tonne LGV (14)'] = 1
    
    # Again, if not, can it fit into an 18-tonne HGV
    elif pallet_capacity_7_5_tonne_LGV < pallets_needed <= pallet_capacity_18_tonne_HGV:
        df_trucks_needed_min.loc[cluster, '18-tonne HGV (24)'] = 1

# Add a total row that sums the number of trucks needed for each truck type
df_trucks_needed_min.loc['Total'] = df_trucks_needed_min.sum()
# Display the DataFrame with the truck type and number of trucks needed for each courier on average
df_trucks_needed_min

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24)
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,8.0,4.072352,0.0,1.0,0.0
1,30.0,19.234192,0.0,0.0,1.0
2,23.0,18.132844,0.0,0.0,1.0
3,14.0,9.401686,0.0,1.0,0.0
4,13.0,8.970153,0.0,1.0,0.0
Total,88.0,59.811228,0.0,3.0,2.0


## Cost Analysis based on the daily min. of pallets in each cluster

In [27]:
# Creating a new DataFrame to calculate the costs
standing_costs = df_trucks_needed_min
standing_costs['3.5_tonne_standing_costs'] = 0
standing_costs['7.5_tonne_standing_costs'] = 0
standing_costs['18_tonne_standing_costs'] = 0
standing_costs

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24),3.5_tonne_standing_costs,7.5_tonne_standing_costs,18_tonne_standing_costs
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,8.0,4.072352,0.0,1.0,0.0,0,0,0
1,30.0,19.234192,0.0,0.0,1.0,0,0,0
2,23.0,18.132844,0.0,0.0,1.0,0,0,0
3,14.0,9.401686,0.0,1.0,0.0,0,0,0
4,13.0,8.970153,0.0,1.0,0.0,0,0,0
Total,88.0,59.811228,0.0,3.0,2.0,0,0,0


In [28]:
# List of standing Costs per vehicle type
standing_costs_3_5_tonne_van = 600
standing_costs_7_5_tonne_LGV = 1250
standing_costs_18_tonne_HGV = 1800


In [29]:
# Calculating standing costs for each cluster based on the vehicle type and quantity
standing_costs['3.5_tonne_standing_costs'] = standing_costs['3.5-tonne Van (2)']*standing_costs_3_5_tonne_van
standing_costs['7.5_tonne_standing_costs'] = standing_costs['7.5-tonne LGV (14)']*standing_costs_7_5_tonne_LGV
standing_costs['18_tonne_standing_costs'] = standing_costs['18-tonne HGV (24)']*standing_costs_18_tonne_HGV

# View DataFrame
standing_costs

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24),3.5_tonne_standing_costs,7.5_tonne_standing_costs,18_tonne_standing_costs
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,8.0,4.072352,0.0,1.0,0.0,0.0,1250.0,0.0
1,30.0,19.234192,0.0,0.0,1.0,0.0,0.0,1800.0
2,23.0,18.132844,0.0,0.0,1.0,0.0,0.0,1800.0
3,14.0,9.401686,0.0,1.0,0.0,0.0,1250.0,0.0
4,13.0,8.970153,0.0,1.0,0.0,0.0,1250.0,0.0
Total,88.0,59.811228,0.0,3.0,2.0,0.0,3750.0,3600.0


In [30]:
# Replicating DataFrame and add additional columns to display running costs per vehicle type
running_costs = standing_costs
running_costs['3.5_tonne_running_costs'] = 0
running_costs['7.5_tonne_running_costs'] = 0
running_costs['18_tonne_running_costs'] = 0

# View DataFrame
running_costs

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24),3.5_tonne_standing_costs,7.5_tonne_standing_costs,18_tonne_standing_costs,3.5_tonne_running_costs,7.5_tonne_running_costs,18_tonne_running_costs
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,8.0,4.072352,0.0,1.0,0.0,0.0,1250.0,0.0,0,0,0
1,30.0,19.234192,0.0,0.0,1.0,0.0,0.0,1800.0,0,0,0
2,23.0,18.132844,0.0,0.0,1.0,0.0,0.0,1800.0,0,0,0
3,14.0,9.401686,0.0,1.0,0.0,0.0,1250.0,0.0,0,0,0
4,13.0,8.970153,0.0,1.0,0.0,0.0,1250.0,0.0,0,0,0
Total,88.0,59.811228,0.0,3.0,2.0,0.0,3750.0,3600.0,0,0,0


In [31]:
# List of running costs per vehicle type
running_costs_3_5_tonne_van = 2800
running_costs_7_5_tonne_LGV = 3200
running_costs_18_tonne_HGV = 3600

In [32]:
# Calculating running costs for each cluster based on the vehicle type and quantity
running_costs['3.5_tonne_running_costs'] = running_costs['3.5-tonne Van (2)']*running_costs_3_5_tonne_van
running_costs['7.5_tonne_running_costs'] = running_costs['7.5-tonne LGV (14)']*running_costs_7_5_tonne_LGV
running_costs['18_tonne_running_costs'] = running_costs['18-tonne HGV (24)']*running_costs_18_tonne_HGV

# View DataFrame
running_costs

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24),3.5_tonne_standing_costs,7.5_tonne_standing_costs,18_tonne_standing_costs,3.5_tonne_running_costs,7.5_tonne_running_costs,18_tonne_running_costs
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,8.0,4.072352,0.0,1.0,0.0,0.0,1250.0,0.0,0.0,3200.0,0.0
1,30.0,19.234192,0.0,0.0,1.0,0.0,0.0,1800.0,0.0,0.0,3600.0
2,23.0,18.132844,0.0,0.0,1.0,0.0,0.0,1800.0,0.0,0.0,3600.0
3,14.0,9.401686,0.0,1.0,0.0,0.0,1250.0,0.0,0.0,3200.0,0.0
4,13.0,8.970153,0.0,1.0,0.0,0.0,1250.0,0.0,0.0,3200.0,0.0
Total,88.0,59.811228,0.0,3.0,2.0,0.0,3750.0,3600.0,0.0,9600.0,7200.0


In [33]:
running_costs.shape

(6, 11)

In [34]:
# Add additional column to calculate total costs per cluster
running_costs['Total_costs_month'] = running_costs.iloc[:,6:11].sum(axis=1)
running_costs

Unnamed: 0_level_0,Minimum_Orders_Day,Minimum_Pallets_Day,3.5-tonne Van (2),7.5-tonne LGV (14),18-tonne HGV (24),3.5_tonne_standing_costs,7.5_tonne_standing_costs,18_tonne_standing_costs,3.5_tonne_running_costs,7.5_tonne_running_costs,18_tonne_running_costs,Total_costs_month
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,8.0,4.072352,0.0,1.0,0.0,0.0,1250.0,0.0,0.0,3200.0,0.0,4450.0
1,30.0,19.234192,0.0,0.0,1.0,0.0,0.0,1800.0,0.0,0.0,3600.0,5400.0
2,23.0,18.132844,0.0,0.0,1.0,0.0,0.0,1800.0,0.0,0.0,3600.0,5400.0
3,14.0,9.401686,0.0,1.0,0.0,0.0,1250.0,0.0,0.0,3200.0,0.0,4450.0
4,13.0,8.970153,0.0,1.0,0.0,0.0,1250.0,0.0,0.0,3200.0,0.0,4450.0
Total,88.0,59.811228,0.0,3.0,2.0,0.0,3750.0,3600.0,0.0,9600.0,7200.0,24150.0


In [35]:
running_costs.shape

(6, 12)

In [46]:
# Checking the total number of pallets delivered currently per cluster and delivery costs spent
pallet_cost_now = kite_nonDX.groupby('Cluster Label',as_index=False)[['Pallets','DeliveryCost']].sum()
pallet_cost_now

Unnamed: 0,Cluster Label,Pallets,DeliveryCost
0,0,163.071942,7736.97
1,1,719.423636,31053.9
2,2,525.619664,24810.45
3,3,386.425434,16275.18
4,4,303.86058,12971.47


In [37]:
# Extracted the minimum amount of pallet per day per cluster which were used for the analysis
pallet_minimum = pd.DataFrame(running_costs['Minimum_Pallets_Day'])
pallet_minimum

Unnamed: 0_level_0,Minimum_Pallets_Day
Cluster Label,Unnamed: 1_level_1
0,4.072352
1,19.234192
2,18.132844
3,9.401686
4,8.970153
Total,59.811228


In [47]:
# Multiply minimum amount of pallet per day per cluster by the number of standard working days in a month
pallet_minimum['Minimum_Pallets_Month'] = pallet_minimum['Minimum_Pallets_Day']*20
pallet_minimum

Unnamed: 0_level_0,Minimum_Pallets_Day,Minimum_Pallets_Month
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1
0,4.072352,81.447049
1,19.234192,384.683846
2,18.132844,362.656887
3,9.401686,188.033714
4,8.970153,179.403065
Total,59.811228,1196.224561


In [48]:
# Comparing proportion of the suggested analysis to total delivery in each cluster
comparison = pd.merge(pallet_cost_now,pallet_minimum['Minimum_Pallets_Month'],on ='Cluster Label',how='left')
comparison

Unnamed: 0,Cluster Label,Pallets,DeliveryCost,Minimum_Pallets_Month
0,0,163.071942,7736.97,81.447049
1,1,719.423636,31053.9,384.683846
2,2,525.619664,24810.45,362.656887
3,3,386.425434,16275.18,188.033714
4,4,303.86058,12971.47,179.403065


In [40]:
# Adding total costs per month in each cluster - based on amount of trucks to fulfill minimum pallets of deliveries
comparison['total_costs_minimum'] = running_costs['Total_costs_month']

In [41]:
# Added columns to calculate the percentage to compare analysis result in comparison to total deliveries
comparison['min_pallets_%'] = comparison['Minimum_Pallets_Month']/comparison['Pallets']*100
comparison['min_cost_%'] = comparison['total_costs_minimum']/comparison['DeliveryCost']*100

In [42]:
# View DataFrame
comparison

Unnamed: 0,Cluster Label,Pallets,DeliveryCost,Minimum_Pallets_Month,total_costs_minimum,min_pallets_%,min_cost_%
0,0,163.071942,7736.97,81.447049,4450.0,49.945471,57.516056
1,1,719.423636,31053.9,384.683846,5400.0,53.471116,17.38912
2,2,525.619664,24810.45,362.656887,5400.0,68.996065,21.765022
3,3,386.425434,16275.18,188.033714,4450.0,48.659766,27.342248
4,4,303.86058,12971.47,179.403065,4450.0,59.041244,34.306058
