# Optimal Truck Packing Model

# 

### Importing Neccesary Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Suppressing warnings
import warnings  
warnings.filterwarnings('ignore')

# 

### Loading and Inspecting the Data

In [2]:
# Importing the CSV file into a DataFrame
kite = pd.read_csv('clustered_data.csv', index_col = 0)

# Viewing the DataFrame
kite.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,52.496759,-2.173682,1
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,52.496759,-2.173682,1


In [3]:
# Checking the shape of the DataFrame
kite.shape

(4119, 17)

In [4]:
# Exploring the metadata
kite.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4119 entries, 0 to 4118
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   distance        4119 non-null   float64
 1   OrderID         4119 non-null   int64  
 2   WebsiteID       4119 non-null   int64  
 3   OrderDate       4119 non-null   object 
 4   Courier         4119 non-null   object 
 5   ProductsExVAT   4119 non-null   float64
 6   DeliveryExVAT   4119 non-null   float64
 7   DateDespatched  4119 non-null   object 
 8   DeliveryCost    4119 non-null   float64
 9   Weight          4119 non-null   float64
 10  Volume (m3)     4119 non-null   float64
 11  Pallets         4119 non-null   float64
 12  id              4119 non-null   int64  
 13  Outward Code    4119 non-null   object 
 14  latitude        4119 non-null   float64
 15  longitude       4119 non-null   float64
 16  Cluster Label   4119 non-null   int64  
dtypes: float64(9), int64(4), object(4

In [5]:
# Changing the data types of OrderDate and DateDespatched to datetime
# Formatting the dates so they are yyyy/mm/dd
kite['OrderDate'] = pd.to_datetime(kite['OrderDate'], format = '%Y-%m-%d')
kite['DateDespatched'] = pd.to_datetime(kite['DateDespatched'], format = '%Y-%m-%d')

# Checking this has worked
print(kite.info())
kite.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4119 entries, 0 to 4118
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   distance        4119 non-null   float64       
 1   OrderID         4119 non-null   int64         
 2   WebsiteID       4119 non-null   int64         
 3   OrderDate       4119 non-null   datetime64[ns]
 4   Courier         4119 non-null   object        
 5   ProductsExVAT   4119 non-null   float64       
 6   DeliveryExVAT   4119 non-null   float64       
 7   DateDespatched  4119 non-null   datetime64[ns]
 8   DeliveryCost    4119 non-null   float64       
 9   Weight          4119 non-null   float64       
 10  Volume (m3)     4119 non-null   float64       
 11  Pallets         4119 non-null   float64       
 12  id              4119 non-null   int64         
 13  Outward Code    4119 non-null   object        
 14  latitude        4119 non-null   float64       
 15  long

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,52.496759,-2.173682,1
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,52.496759,-2.173682,1


# 

### Only Looking at Working Days

In [6]:
# Creating a column to identify the day of the week that the order was dispatched
kite['weekday'] = kite['DateDespatched'].apply(lambda x: x.weekday())

# Removing orders from the DataFrame that were dispatched on a Saturday (5) or Sunday (6)    
kite_wd = kite.loc[~((kite['weekday'] == 5) | (kite['weekday'] == 6))]

# Checking this has worked
kite_wd['weekday'].unique()

array([1, 3, 0, 2, 4], dtype=int64)

In [7]:
# Removing orders that were dispatched on the Easter bank holidays April 7th & 10th
kite_wd = kite_wd[~kite_wd['DateDespatched'].isin([pd.Timestamp('20230407'), pd.Timestamp('20230410')])]

# Removing orders on April 24th as these seem unusually low
kite_wd = kite_wd[~kite_wd['DateDespatched'].isin([pd.Timestamp('20230424')])]

# Checking this has worked
kite_wd['DateDespatched'].unique()

array(['2023-04-18T00:00:00.000000000', '2023-03-28T00:00:00.000000000',
       '2023-03-30T00:00:00.000000000', '2023-03-27T00:00:00.000000000',
       '2023-04-05T00:00:00.000000000', '2023-04-14T00:00:00.000000000',
       '2023-04-21T00:00:00.000000000', '2023-04-04T00:00:00.000000000',
       '2023-04-13T00:00:00.000000000', '2023-04-19T00:00:00.000000000',
       '2023-04-17T00:00:00.000000000', '2023-03-31T00:00:00.000000000',
       '2023-04-11T00:00:00.000000000', '2023-04-20T00:00:00.000000000',
       '2023-04-03T00:00:00.000000000', '2023-03-29T00:00:00.000000000',
       '2023-04-12T00:00:00.000000000', '2023-04-06T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [8]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1,3
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,52.496759,-2.173682,1,0
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,52.496759,-2.173682,1,2


In [9]:
kite_wd.shape

(4068, 18)

# 

### Only Looking to Replace DX Freight, MFS, and Split

In [10]:
# Removing DX Express from the DataFrame
kite_wd = kite_wd.loc[~(kite_wd['Courier'] == "DX Express")]

# Checking this has worked
kite_wd['Courier'].unique()

array(['MFS', 'DX Freight', 'Split'], dtype=object)

In [11]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.0,0.0,2023-04-14,20.55,44.2,0.1235,0.054167,766,DY6,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.0,2023-04-21,23.25,51.12,0.1296,0.1,766,DY6,52.496759,-2.173682,1,4


In [12]:
kite_wd.shape

(2537, 18)

# 

### OPTIMAL TRUCK PACKING MODEL 

**An example of how this model/function works:**

Say you have a day when there are 40 orders in a cluster and you have one 18-tonne truck assigned to that cluster. This function helps you to decide the 25 orders that should go in the 18-tonne truck so that it is filled optimally and we are left with the minimum number of pallets in the leftover orders. This would be beneficial to Kite as it would mean that they have fewer pallets to hand over to third-party couriers (potential cost saving). It could also be useful to Kite as it could help them to decide what trucks they should acquire next.

**There are limitations to this model/function:**
- To find the true optimum would be a very slow process. This model/function is a sped up version where we look at only a handful of the possible combinations of 25 and find the best one out of them.
- This model/function breaks when there are 25 or under orders, but they cannot all fit in the specified truck.
- Similarly, this model/function is not useful when the number of orders exceeds 25, but they would all fit on the specified truck anyway. 

In [13]:
# Function to identify the orders that should be packed onto a truck and the proprtion of the truck that they occupy
def what_orders_in_truck(cluster, date, truck):
    
    # Subsetting the data to only look at the specific date and cluster
    # Remembering to reset the index
    kite_sub = kite_wd[(kite_wd['DateDespatched'] == date) & (kite_wd['Cluster Label'] == cluster)].reset_index(drop=True)
    
    # Defining the maximum number of orders a single truck can take in a day
    max_orders = 25
    
    # Defining the maximum number of pallets that the specific truck can take
    if truck == 3.5:
        max_pallets = 2
    elif truck == 7.5:
        max_pallets = 14
    elif truck == 18:
        max_pallets = 24
    else:
        print("Truck input is not valid.")
        return
            
    # Creating an empty list to store the truck-capacity-proportion of each combination 
    pallets_in_truck = []

    # Considering the scenario where there are 25 or under orders and they can all fit in the specified truck
    if (len(kite_sub) <= max_orders) & (sum(kite_sub['Pallets']) <= max_pallets):
        print("All orders should go in the truck.")
        return
    
    # Considering the scenario where there are 25 or under orders, but they cannot all fit in the specified truck
    elif (len(kite_sub) <= max_orders) & (sum(kite_sub['Pallets']) > max_pallets):
        print("Either a different sized truck is needed, or less than 25 orders can be placed on the truck.")
        return
    
    # Considering the scenario where there are over 25 orders and we want to find the 25 orders that will best fill 
    # the specified truck
    else:
        
        # Using a for loop to sum the number of pallets in the different combinations of 25 orders. Then dividing each
        # by the pallet capacity of the specified truck to see how best each combination fills the truck. 
        for x in range(len(kite_sub)):
            if x <= (len(kite_sub) - max_orders): 
                pallets_in_truck.append(sum(kite_sub['Pallets'].iloc[x:(x + max_orders)])/max_pallets)
            else:
                pallets_in_truck.append(sum((kite_sub['Pallets'].iloc[x:(len(kite_sub))]).append(kite_sub['Pallets'].iloc[:(max_orders - (len(kite_sub) - x))]))/24)
        
        # Checking that the truck-capacity-proportions in this list do not exceed the capacity of the truck
        if len(list(filter(lambda i: i <= 1, pallets_in_truck))) == 0:
            print("According to this model, 25 orders will not fit in this truck.")
            return
         
        else:
            # Finding the best truck-capacity-proportion that doesn't exceed the capacity of the truck 
            best_proportion = max(filter(lambda i: i <= 1, pallets_in_truck))
            # Finding the combination with the best truck-capacity-proportion 
            y = pallets_in_truck.index(best_proportion)
            # Returning the orders that should go in the truck and the proportion of the truck that they occupy
            if y <= (len(kite_sub) - max_orders):
                return list(kite_sub['OrderID'].iloc[y:(y + max_orders)]), best_proportion
            else:
                return list((kite_sub['OrderID'].iloc[y:(len(kite_sub))]).append(kite_sub['OrderID'].iloc[:(max_orders - (len(kite_sub) - y))])), best_proportion

# 

### TRYING OUT THE MODEL/FUNCTION

In [14]:
# Cluster 1, 30th March 2023, 18-tonne truck
what_orders_in_truck(1, "2023-03-30", 18)

([2279297,
  2279447,
  2279799,
  2280166,
  2279524,
  2279451,
  2279606,
  2279599,
  2279839,
  2279513,
  2279778,
  2279858,
  2279715,
  2279319,
  2279914,
  2279172,
  2280228,
  2279687,
  2279161,
  2278994,
  2279835,
  2279287,
  2279198,
  2280132,
  2279462],
 0.9741935945635979)

In [15]:
# Cluster 1, 27th March 2023, 18-tonne truck
what_orders_in_truck(1, "2023-03-27", 18)

([2276141,
  2275372,
  2276085,
  2275990,
  2275590,
  2275973,
  2276092,
  2275488,
  2275821,
  2275657,
  2275384,
  2275823,
  2275352,
  2275288,
  2275956,
  2275636,
  2275473,
  2275353,
  2275427,
  2275214,
  2275395,
  2275807,
  2276199,
  2275289,
  2275327],
 0.9743602235338497)

# 

### NUMBER OF PALLETS LEFT OVER EACH DAY IN CLUSTER 1

In [16]:
# Pivot the data to see number of orders by day by cluster
df_orders_by_day = kite_wd.pivot_table(index='Cluster Label', columns='DateDespatched', values='OrderID', aggfunc='count')

# Display the DataFrame
df_orders_by_day

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21
Cluster Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,9,9,17,8,15,13,17,14,14,9,12,13,11,13,16,17,13,11
1,32,54,37,40,30,53,49,39,37,51,54,46,36,44,61,54,42,41
2,42,35,26,23,36,37,39,42,26,34,46,40,23,51,38,48,36,25
3,18,18,29,28,17,36,28,30,31,27,37,19,14,42,35,27,25,19
4,21,24,22,23,21,23,18,16,18,32,23,18,13,26,29,20,16,16


In [17]:
# Creating an empty DataFrame to store the relevant information for each day 
pallets_capacity = pd.DataFrame().reindex_like(df_orders_by_day).reset_index(drop=True)
pallets_capacity = pallets_capacity.iloc[5:]
pallets_capacity.loc['Proportion of Truck Capacity'] = 0
pallets_capacity.loc['Orders Remaining'] = 0
pallets_capacity.loc['Pallets Remaining'] = 0
pallets_capacity

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21
Proportion of Truck Capacity,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Orders Remaining,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Pallets Remaining,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [18]:
# Populating the DataFrame with the relevant information
for x in (kite_wd['DateDespatched'].unique()):
    pallets_capacity.loc['Proportion of Truck Capacity', x] = what_orders_in_truck(1, x, 18)[1]
    pallets_capacity.loc['Orders Remaining', x] = df_orders_by_day.loc[1, x] - len(what_orders_in_truck(1, x, 18)[0])
    kite_sub = kite_wd[(kite_wd['DateDespatched'] == x) & (kite_wd['Cluster Label'] == 1)].reset_index(drop=True)
    remaining_pallets = kite_sub[~(kite_sub['OrderID'].isin((what_orders_in_truck(1, x, 18)[0])))]
    pallets_capacity.loc['Pallets Remaining', x] = sum(remaining_pallets['Pallets'])
    
# Viewing the DataFrame   
pallets_capacity

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21
Proportion of Truck Capacity,0.97436,0.803879,0.872105,0.974194,0.941642,0.978242,0.950034,0.764928,0.975297,0.952076,0.903522,0.916081,0.992985,0.874209,0.682473,0.708283,0.596801,0.995204
Orders Remaining,7.0,29.0,12.0,15.0,5.0,28.0,24.0,14.0,12.0,26.0,29.0,21.0,11.0,19.0,36.0,29.0,17.0,16.0
Pallets Remaining,14.86746,10.80484,4.944891,7.223973,12.150546,28.612832,36.273052,19.587303,12.452431,41.333921,16.523431,39.031192,16.779019,5.584353,44.140453,12.745349,4.910963,10.906062
