# Truck Analysis

# 

### Importing Neccesary Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Suppressing warnings
import warnings  
warnings.filterwarnings('ignore')

# 

### Loading and Inspecting the Data

In [2]:
# Importing the CSV file into a DataFrame
kite = pd.read_csv('clustered_data.csv', index_col = 0)

# Viewing the DataFrame
kite.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label
0,26.84,2295440,7700,18/04/2023,MFS,375.4,0.0,18/04/2023,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1
1,26.84,2277415,67940,28/03/2023,DX Freight,236.61,0.0,28/03/2023,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1
2,26.84,2279524,67940,30/03/2023,DX Freight,70.58,13.2,30/03/2023,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1
3,26.84,2275231,483789,27/03/2023,DX Express,76.95,7.22,27/03/2023,6.81,17.77,0.0402,0.030342,766,DY6,United Kingdom,52.496759,-2.173682,1
4,26.84,2285334,483789,05/04/2023,DX Express,147.3,0.0,05/04/2023,11.46,34.7,0.100552,0.052361,766,DY6,United Kingdom,52.496759,-2.173682,1


In [3]:
# Checking the shape of the DataFrame
kite.shape

(4119, 18)

In [4]:
# Exploring the metadata
kite.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4119 entries, 0 to 4118
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   distance        4119 non-null   float64
 1   OrderID         4119 non-null   int64  
 2   WebsiteID       4119 non-null   int64  
 3   OrderDate       4119 non-null   object 
 4   Courier         4119 non-null   object 
 5   ProductsExVAT   4119 non-null   float64
 6   DeliveryExVAT   4119 non-null   float64
 7   DateDespatched  4119 non-null   object 
 8   DeliveryCost    4119 non-null   float64
 9   Weight          4119 non-null   float64
 10  Volume (m3)     4119 non-null   float64
 11  Pallets         4119 non-null   float64
 12  id              4119 non-null   int64  
 13  Outward Code    4119 non-null   object 
 14  Country         4119 non-null   object 
 15  latitude        4119 non-null   float64
 16  longitude       4119 non-null   float64
 17  Cluster Label   4119 non-null   i

In [5]:
# Changing the data types of OrderDate and DateDespatched to datetime
# Formatting the dates so they are yyyy/mm/dd
kite['OrderDate'] = pd.to_datetime(kite['OrderDate'], format = '%d/%m/%Y')
kite['DateDespatched'] = pd.to_datetime(kite['DateDespatched'], format = '%d/%m/%Y')

# Checking this has worked
print(kite.info())
kite.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4119 entries, 0 to 4118
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   distance        4119 non-null   float64       
 1   OrderID         4119 non-null   int64         
 2   WebsiteID       4119 non-null   int64         
 3   OrderDate       4119 non-null   datetime64[ns]
 4   Courier         4119 non-null   object        
 5   ProductsExVAT   4119 non-null   float64       
 6   DeliveryExVAT   4119 non-null   float64       
 7   DateDespatched  4119 non-null   datetime64[ns]
 8   DeliveryCost    4119 non-null   float64       
 9   Weight          4119 non-null   float64       
 10  Volume (m3)     4119 non-null   float64       
 11  Pallets         4119 non-null   float64       
 12  id              4119 non-null   int64         
 13  Outward Code    4119 non-null   object        
 14  Country         4119 non-null   object        
 15  lati

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,United Kingdom,52.496759,-2.173682,1
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,United Kingdom,52.496759,-2.173682,1


# 

### Only Looking at Working Days

In [6]:
# Creating a column to identify the day of the week that the order was dispatched
kite['weekday'] = kite['DateDespatched'].apply(lambda x: x.weekday())

# Removing orders from the DataFrame that were dispatched on a Saturday (5) or Sunday (6)    
kite_wd = kite.loc[~((kite['weekday'] == 5) | (kite['weekday'] == 6))]

# Checking this has worked
kite_wd['weekday'].unique()

array([1, 3, 0, 2, 4], dtype=int64)

In [7]:
# Removing orders that were dispatched on the Easter bank holidays April 7th & 10th
kite_wd = kite_wd[~kite_wd['DateDespatched'].isin([pd.Timestamp('20230407'), pd.Timestamp('20230410')])]

# Checking this has worked
kite_wd['DateDespatched'].unique()

array(['2023-04-18T00:00:00.000000000', '2023-03-28T00:00:00.000000000',
       '2023-03-30T00:00:00.000000000', '2023-03-27T00:00:00.000000000',
       '2023-04-05T00:00:00.000000000', '2023-04-14T00:00:00.000000000',
       '2023-04-21T00:00:00.000000000', '2023-04-04T00:00:00.000000000',
       '2023-04-13T00:00:00.000000000', '2023-04-19T00:00:00.000000000',
       '2023-04-17T00:00:00.000000000', '2023-03-31T00:00:00.000000000',
       '2023-04-11T00:00:00.000000000', '2023-04-20T00:00:00.000000000',
       '2023-04-03T00:00:00.000000000', '2023-04-24T00:00:00.000000000',
       '2023-03-29T00:00:00.000000000', '2023-04-12T00:00:00.000000000',
       '2023-04-06T00:00:00.000000000'], dtype='datetime64[ns]')

In [8]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,United Kingdom,52.496759,-2.173682,1,0
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,United Kingdom,52.496759,-2.173682,1,2


# 

### Only Looking to Replace DX Freight, MFS, and Split

In [9]:
# Removing DX Express from the DataFrame
kite_wd = kite_wd.loc[~(kite_wd['Courier'] == "DX Express")]

# Checking this has worked
kite_wd['Courier'].unique()

array(['MFS', 'DX Freight', 'Split'], dtype=object)

In [10]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.0,0.0,2023-04-14,20.55,44.2,0.1235,0.054167,766,DY6,United Kingdom,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.0,2023-04-21,23.25,51.12,0.1296,0.1,766,DY6,United Kingdom,52.496759,-2.173682,1,4


In [11]:
complete = kite_wd.groupby(['Cluster Label','DateDespatched'],as_index=False)['Weight', 'Volume (m3)', 
                                                                         'Pallets', 'DeliveryCost'].sum()
complete

Unnamed: 0,Cluster Label,DateDespatched,Weight,Volume (m3),Pallets,DeliveryCost
0,0,2023-03-27,861.03,7.251909,4.626825,342.98
1,0,2023-03-28,453.91,9.952171,5.520840,211.19
2,0,2023-03-29,2345.64,20.547438,15.878389,685.44
3,0,2023-03-30,652.47,7.028912,4.072352,238.54
4,0,2023-03-31,934.83,13.674806,8.579204,390.70
...,...,...,...,...,...,...
89,4,2023-04-17,1993.01,34.181551,17.558513,798.38
90,4,2023-04-18,3990.24,43.853590,25.580183,1211.00
91,4,2023-04-19,1776.91,23.832307,14.777550,684.56
92,4,2023-04-20,854.24,19.378556,8.970153,367.26


In [12]:
complete.groupby('Cluster Label',as_index = False)[['Weight', 'Volume (m3)','Pallets', 'DeliveryCost']].median()

Unnamed: 0,Cluster Label,Weight,Volume (m3),Pallets,DeliveryCost
0,0,1329.11,12.239129,8.497022,385.91
1,1,6085.22,61.112631,35.859555,1556.93
2,2,4569.78,42.644347,27.114326,1313.77
3,3,2770.1,30.000494,20.095868,966.39
4,4,2102.68,25.224294,16.235939,701.73


In [13]:
kite_wd.groupby('Cluster Label',as_index=False)['DeliveryCost'].sum()

Unnamed: 0,Cluster Label,DeliveryCost
0,0,7840.13
1,1,31299.62
2,2,24884.94
3,3,16318.21
4,4,12971.47


# 

### Only Looking at Cluster 1 (Birmingham)

In [14]:
# Removing DX Express from the DataFrame
#kite_wd = kite_wd.loc[kite_wd['Cluster Label'] == 1]

# Checking this has worked
kite_wd['Cluster Label'].unique()

array([1, 3, 4, 2, 0], dtype=int64)

In [15]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.0,0.0,2023-04-14,20.55,44.2,0.1235,0.054167,766,DY6,United Kingdom,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.0,2023-04-21,23.25,51.12,0.1296,0.1,766,DY6,United Kingdom,52.496759,-2.173682,1,4


# 

### Writing a Function to Compute the Trucks Needed 

In [16]:
# Creating a function to identify trucks needed based on weight
def trucks_weight(x):
    if x.iloc[0] <= 1500:
        return 3.5
    elif 1500 < x.iloc[0] <= 4000:
        return 7.5
    elif 4000 < x.iloc[0] <= 10000:
        return 18
    else:
        return 1000 

In [17]:
# Creating a function to identify trucks needed based on volume
def trucks_volume(x):
    if dxfreight.iloc[1] <= 2.5:
        return 3.5
    elif 2.5 < dxfreight.iloc[1] <= 25:
        return 7.5
    elif 25 < dxfreight.iloc[1] <= 45:
        return 18
    else:
        return 1000

In [18]:
# Creating a function to identify trucks needed based on pallets
def trucks_pallet(x):
    if dxfreight.iloc[2] <= 2:
        return 3.5
    elif 2 < dxfreight.iloc[2] <= 14:
        return 7.5
    elif 14 < dxfreight.iloc[2] <= 24:
        return 18
    else:
        return 1000

In [19]:
# Creating a function to identify the number of trucks needed based on weight, volume, and pallets
def trucks_needed(x):
    truck = max(trucks_weight(dxfreight), trucks_volume(dxfreight), trucks_pallet(dxfreight))     
    if truck < 1000:
        print("One", truck, "- tonne truck is needed.")
    else:
        print("More investigation is needed.")

# 

### Investigating the Number of Trucks Needed (Median)

In [20]:
# Calculating the median weight, volume, number of pallets, and delivery cost
# for orders carried out by each courier
courier1 = kite_wd.groupby(['Courier'])['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost'].median()
courier1

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,DeliveryCost
Courier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DX Freight,42.6,0.393361,0.269886,19.78
MFS,163.84,1.104,1.0,39.15
Split,206.785,2.768509,1.750521,53.86


In [21]:
# Calulating the number of orders carried out by each courier for each day
courier2 = kite_wd.pivot_table(index = 'Courier', 
                               columns = 'DateDespatched', 
                               values = 'OrderID', aggfunc = 'count')
courier2

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-24
Courier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
DX Freight,71.0,91.0,86.0,73.0,76.0,108.0,88.0,86.0,81.0,95.0,104.0,82.0,66.0,116.0,109.0,108.0,84.0,66.0,15.0
MFS,43.0,48.0,41.0,44.0,40.0,49.0,58.0,46.0,43.0,54.0,60.0,47.0,27.0,53.0,64.0,54.0,48.0,44.0,4.0
Split,8.0,1.0,4.0,5.0,3.0,5.0,5.0,9.0,2.0,4.0,8.0,7.0,4.0,7.0,6.0,4.0,,2.0,


In [22]:
# Calculating the median number of orders on a single day for each courier 
courier3 = courier2.median(axis=1)
courier3

Courier
DX Freight    86.0
MFS           47.0
Split          5.0
dtype: float64

#### To Replace DX Freight

In [23]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of DX Freight orders on a single working day 
dxfreight = courier1.iloc[0]*courier3.iloc[0]
dxfreight

Weight          3663.600000
Volume (m3)       33.829026
Pallets           23.210227
DeliveryCost    1701.080000
Name: DX Freight, dtype: float64

In [24]:
# Computing the trucks needed to replace DX Freight
trucks_needed(dxfreight)

One 18 - tonne truck is needed.


#### To Replace MFS

In [25]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of MFS orders on a single working day 
mfs = courier1.iloc[1]*courier3.iloc[1]
mfs

Weight          7700.480
Volume (m3)       51.888
Pallets           47.000
DeliveryCost    1840.050
Name: MFS, dtype: float64

In [26]:
# Computing the trucks needed to replace MFS
trucks_needed(mfs)

One 18 - tonne truck is needed.


#### To Replace Split

In [27]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of Split orders on a single working day 
split = courier1.iloc[2]*courier3.iloc[2]
split

Weight          1033.925000
Volume (m3)       13.842543
Pallets            8.752604
DeliveryCost     269.300000
Name: Split, dtype: float64

In [28]:
# Computing the trucks needed to replace Split
trucks_needed(split)

One 18 - tonne truck is needed.


# 

### Investigating the Number of Trucks Needed (25th Percentile)

In [29]:
# Calculating the 25th percentile of weight, volume, number of pallets, and delivery cost
# for orders carried out by each courier
courier1 = kite_wd.groupby(['Courier'])['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost'].quantile(0.25) 
courier1

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,DeliveryCost
Courier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DX Freight,25.6,0.22074,0.144444,13.23
MFS,117.23,0.531653,0.404439,34.46
Split,146.295,2.118337,1.361935,50.07


In [30]:
# Calulating the number of orders carried out by each courier for each day
courier2 = kite_wd.pivot_table(index = 'Courier', 
                               columns = 'DateDespatched', 
                               values = 'OrderID', aggfunc = 'count')
courier2

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-24
Courier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
DX Freight,71.0,91.0,86.0,73.0,76.0,108.0,88.0,86.0,81.0,95.0,104.0,82.0,66.0,116.0,109.0,108.0,84.0,66.0,15.0
MFS,43.0,48.0,41.0,44.0,40.0,49.0,58.0,46.0,43.0,54.0,60.0,47.0,27.0,53.0,64.0,54.0,48.0,44.0,4.0
Split,8.0,1.0,4.0,5.0,3.0,5.0,5.0,9.0,2.0,4.0,8.0,7.0,4.0,7.0,6.0,4.0,,2.0,


In [31]:
# Calculating the 25th percentile number of orders on a single day for each courier 
courier3 = courier2.quantile(0.25, axis=1)
courier3

Courier
DX Freight    74.5
MFS           43.0
Split          4.0
Name: 0.25, dtype: float64

#### To Replace DX Freight

In [32]:
# Based on 25th percentiles, calculating the total weight, volume, number of pallets, and delivery cost
# of DX Freight orders on a single working day 
dxfreight = courier1.iloc[0]*courier3.iloc[0]
dxfreight

Weight          1907.200000
Volume (m3)       16.445130
Pallets           10.761111
DeliveryCost     985.635000
Name: DX Freight, dtype: float64

In [33]:
# Computing the trucks needed to replace DX Freight
trucks_needed(dxfreight)

One 7.5 - tonne truck is needed.


#### To Replace MFS

In [34]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of MFS orders on a single working day 
mfs = courier1.iloc[1]*courier3.iloc[1]
mfs

Weight          5040.890000
Volume (m3)       22.861097
Pallets           17.390898
DeliveryCost    1481.780000
Name: MFS, dtype: float64

In [35]:
# Computing the trucks needed to replace MFS
trucks_needed(mfs)

One 7.5 - tonne truck is needed.


#### To Replace Split

In [36]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of Split orders on a single working day 
split = courier1.iloc[2]*courier3.iloc[2]
split

Weight          585.180000
Volume (m3)       8.473350
Pallets           5.447738
DeliveryCost    200.280000
Name: Split, dtype: float64

In [37]:
# Computing the trucks needed to replace Split
trucks_needed(split)

One 7.5 - tonne truck is needed.


### Christina's inputs

### Investigating the Number of Trucks Needed (Median)

In [38]:
courierA = kite_wd.groupby(['DateDespatched'])['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost'].sum()
courierA 

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,DeliveryCost
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-03-27,14932.31,157.162138,95.031752,4531.42
2023-03-28,17635.3,154.964685,98.282496,4529.71
2023-03-29,16915.54,189.228531,124.941869,4927.43
2023-03-30,15671.39,149.460797,99.919133,4628.22
2023-03-31,21469.48,224.557854,138.875649,5255.01
2023-04-03,18471.75,186.868859,126.062868,5629.14
2023-04-04,19292.82,205.122885,140.093652,5985.86
2023-04-05,19310.71,190.691363,121.224182,5219.25
2023-04-06,16545.26,156.06067,104.067816,4658.85
2023-04-11,19226.47,210.090666,139.251024,5567.56


In [39]:
median = courierA[['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost']].median()
median

Weight          17709.600000
Volume (m3)       177.176680
Pallets           107.256237
DeliveryCost     5046.870000
dtype: float64

In [40]:
trucks_needed(median)

One 7.5 - tonne truck is needed.


In [41]:
# Creating a function to identify trucks needed based on weight
def trucks_weight(y):
    if y.iloc[0] <= 1500:
        return 3.5
    elif 1500 < y.iloc[0] <= 4000:
        return 7.5
    elif 4000 < y.iloc[0] <= 10000:
        return 18
    else:
        return 1000 

In [42]:
# Creating a function to identify trucks needed based on volume
def trucks_volume(y):
    if y.iloc[1] <= 2.5:
        return 3.5
    elif 2.5 < y.iloc[1] <= 25:
        return 7.5
    elif 25 < y.iloc[1] <= 45:
        return 18
    else:
        return 1000

In [43]:
# Creating a function to identify trucks needed based on pallets
def trucks_pallet(y):
    if y.iloc[2] <= 2:
        return 3.5
    elif 2 < y.iloc[2] <= 14:
        return 7.5
    elif 14 < y.iloc[2] <= 24:
        return 18
    else:
        return 1000

In [44]:
# Creating a function to identify the number of trucks needed based on weight, volume, and pallets
def trucks_needed2(y):
    truck = max(trucks_weight(y), trucks_volume(y), trucks_pallet(y))     
    if truck < 1000:
        print("One", truck, "- tonne truck is needed.")
    else:
        print("More investigation is needed.")

In [45]:
trucks_needed2(median)

More investigation is needed.


In [46]:
kite_wd['DeliveryCost'].sum()

93314.37

In [47]:
kite_wd.groupby(['Cluster Label','DateDespatched'],as_index=False)['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost'].sum()

Unnamed: 0,Cluster Label,DateDespatched,Weight,Volume (m3),Pallets,DeliveryCost
0,0,2023-03-27,861.03,7.251909,4.626825,342.98
1,0,2023-03-28,453.91,9.952171,5.520840,211.19
2,0,2023-03-29,2345.64,20.547438,15.878389,685.44
3,0,2023-03-30,652.47,7.028912,4.072352,238.54
4,0,2023-03-31,934.83,13.674806,8.579204,390.70
...,...,...,...,...,...,...
89,4,2023-04-17,1993.01,34.181551,17.558513,798.38
90,4,2023-04-18,3990.24,43.853590,25.580183,1211.00
91,4,2023-04-19,1776.91,23.832307,14.777550,684.56
92,4,2023-04-20,854.24,19.378556,8.970153,367.26


# LOAD OPTIMIZATION

In [48]:
# Define the lorries
FEET_TO_CM = 30.48
lorries = [
    {
        "code": "3.5-tonne",
        "description": "3.5-tonne van",
        "max.volume": 2.5,
        "max_weight": 1500
    },
    {
        "code": "7.5-tonne",
        "description": "7.5-tonne LGV",
        "max.volume": 25,
        "max_weight": 4000
    },
    {
        "code": "18-tonne",
        "description": "18-tonne HGV",
        "max.volume": 45,
        "max_weight": 1000
    }
]


In [49]:
lorries

[{'code': '3.5-tonne',
  'description': '3.5-tonne van',
  'max.volume': 2.5,
  'max_weight': 1500},
 {'code': '7.5-tonne',
  'description': '7.5-tonne LGV',
  'max.volume': 25,
  'max_weight': 4000},
 {'code': '18-tonne',
  'description': '18-tonne HGV',
  'max.volume': 45,
  'max_weight': 1000}]

In [50]:
pip install ortools

Note: you may need to restart the kernel to use updated packages.


In [51]:
from ortools.linear_solver import pywraplp
import warnings
warnings.filterwarnings('ignore')
# create data model for knapsack problem 
# paramter optimize are data to be packing into the available vehicle in totalLorry
def create_data_model(optimize, totalLorry):
    """Create the data for the example."""
    data = {}
    weights = optimize['Weight'].to_list()
    volumes = optimize['Volume (m3)'].to_list()
    
    data['weights'] = weights
    data['volumes'] = volumes
    
    data['items'] = list(range(len(weights)))
    data['num_items'] = len(weights)
    
    max_volumes = []
    max_weights = []
    truck_types = []
    
    # reserve totalLorry data to be starting from small vehicle first
    totalLorry.reverse()
# resgister max_weight and max_volume for each available vehicle
    for tL in totalLorry:
        for i in range(tL['number']):
            max_volumes.append(tL['max_volume'])
            max_weights.append(tL['max_weight'])
            truck_types.append(tL['code'])
    
    data['max_volume'] = max_volumes 
    data['max_weight'] = max_weights 
    data['truck_types'] = truck_types
    
    data['trucks'] = list(range(len(data['max_volume'])))
    
    return data

In [52]:
# ===============================
# ==== Get Load Optimization ====
# ===============================
totalLorry = [{'code': '3.5-tonne','number': 3,
  'description': '3.5-tonne van',
  'max_volume': 2.5,
  'max_weight': 1500},
 {'code': '7.5-tonne','number': 3,
  'description': '7.5-tonne LGV',
  'max_volume': 25,
  'max_weight': 4000},
 {'code': '18-tonne','number': 3,
  'description': '18-tonne HGV',
  'max_volume': 45,
  'max_weight': 1000}]

In [53]:
kite_wd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2556 entries, 0 to 4118
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   distance        2556 non-null   float64       
 1   OrderID         2556 non-null   int64         
 2   WebsiteID       2556 non-null   int64         
 3   OrderDate       2556 non-null   datetime64[ns]
 4   Courier         2556 non-null   object        
 5   ProductsExVAT   2556 non-null   float64       
 6   DeliveryExVAT   2556 non-null   float64       
 7   DateDespatched  2556 non-null   datetime64[ns]
 8   DeliveryCost    2556 non-null   float64       
 9   Weight          2556 non-null   float64       
 10  Volume (m3)     2556 non-null   float64       
 11  Pallets         2556 non-null   float64       
 12  id              2556 non-null   int64         
 13  Outward Code    2556 non-null   object        
 14  Country         2556 non-null   object        
 15  lati

In [54]:
dataset = kite_wd[['Weight','Volume (m3)']]
dataset

Unnamed: 0,Weight,Volume (m3)
0,110.00,0.225000
1,33.30,0.959000
2,21.80,0.209100
5,44.20,0.123500
6,51.12,0.129600
...,...,...
4105,406.56,0.739200
4107,80.00,0.132400
4116,18.90,1.411200
4117,29.88,4.070664


In [55]:
cluster1_data = kite_wd[kite_wd['Cluster Label'] == 1]
cluster1_data

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.40,0.00,2023-04-18,34.46,110.00,0.225000,0.200000,766,DY6,United Kingdom,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.00,2023-03-28,16.45,33.30,0.959000,0.473039,766,DY6,United Kingdom,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.20,2023-03-30,11.79,21.80,0.209100,0.222222,766,DY6,United Kingdom,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.00,0.00,2023-04-14,20.55,44.20,0.123500,0.054167,766,DY6,United Kingdom,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.00,2023-04-21,23.25,51.12,0.129600,0.100000,766,DY6,United Kingdom,52.496759,-2.173682,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4096,46.24,2289999,785183,2023-04-12,DX Freight,1299.84,0.00,2023-04-20,48.28,116.77,2.970782,1.416867,2524,ST12,United Kingdom,52.948000,-2.171000,1,3
4098,28.27,2295696,841091,2023-04-18,DX Freight,186.19,0.00,2023-04-19,14.00,27.34,0.279677,0.273069,2933,WV3,United Kingdom,52.579470,-2.156364,1,2
4099,13.42,2299296,810864,2023-04-21,DX Freight,127.86,0.00,2023-04-24,18.63,39.99,0.236965,0.187738,82,B47,United Kingdom,52.392856,-1.878858,1,0
4107,19.60,2292974,893554,2023-04-16,DX Freight,109.80,0.00,2023-04-17,34.03,80.00,0.132400,0.111111,96,B68,United Kingdom,52.482799,-2.000643,1,0


In [56]:
cluster1_data.sort_values(by ='Weight',ascending = False)


Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
747,13.16,2281006,762149,2023-03-31,MFS,9282.00,0.00,2023-04-18,1039.14,5308.80,55.893600,30.000000,46,B11,United Kingdom,52.458409,-1.852449,1,1
1632,24.53,2291682,590228,2023-04-13,MFS,2331.63,0.00,2023-04-13,194.59,2356.20,8.788500,6.000000,765,DY5,United Kingdom,52.485945,-2.121581,1,3
3517,25.12,2282643,386132,2023-04-03,MFS,3438.60,19.99,2023-04-03,519.57,2324.50,19.902000,15.000000,2927,WV12,United Kingdom,52.605184,-2.042545,1,0
748,13.16,2285347,762149,2023-04-05,MFS,3098.48,19.99,2023-04-05,381.02,1946.56,20.494320,11.000000,46,B11,United Kingdom,52.458409,-1.852449,1,2
745,13.16,2279715,762149,2023-03-30,MFS,3094.00,19.99,2023-03-30,346.38,1769.60,18.631200,10.000000,46,B11,United Kingdom,52.458409,-1.852449,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,49.24,2285758,621438,2023-04-05,DX Freight,120.06,0.00,2023-04-05,10.97,4.50,0.491940,0.166667,2536,ST4,United Kingdom,52.995000,-2.183000,1,2
389,20.91,2293613,610364,2023-04-17,DX Freight,24.10,12.23,2023-04-17,10.92,4.20,0.250000,0.250000,2923,WS9,United Kingdom,52.603195,-1.913883,1,0
2020,44.79,2290370,827901,2023-04-12,DX Freight,39.00,12.23,2023-04-13,10.92,2.70,0.444212,0.275000,2636,TF1,United Kingdom,52.703110,-2.494334,1,3
2452,22.47,2281527,686463,2023-04-01,DX Freight,23.74,12.34,2023-04-03,11.02,2.70,0.303750,0.100000,2910,WS10,United Kingdom,52.560534,-2.015539,1,0


In [57]:
cluster1_databusiest = cluster1_data[cluster1_data['DateDespatched'].dt.strftime('%Y-%m-%d') == '2023-04-18']
cluster1_databusiest

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,Country,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.40,0.00,2023-04-18,34.46,110.00,0.225000,0.200000,766,DY6,United Kingdom,52.496759,-2.173682,1,1
385,20.91,2295100,557961,2023-04-18,DX Freight,151.69,0.00,2023-04-18,20.17,43.70,0.243640,0.183333,2923,WS9,United Kingdom,52.603195,-1.913883,1,1
415,21.22,2294517,669163,2023-04-18,MFS,439.44,0.00,2023-04-18,34.46,209.64,0.486000,0.355556,97,B69,United Kingdom,52.501095,-2.030596,1,1
562,20.84,2294691,189371,2023-04-18,MFS,1816.20,0.00,2023-04-18,34.46,561.60,0.706500,0.357143,99,B70,United Kingdom,52.525509,-2.003138,1,1
563,20.84,2293634,274082,2023-04-17,MFS,453.90,0.00,2023-04-18,34.46,250.00,0.062720,0.416667,99,B70,United Kingdom,52.525509,-2.003138,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3752,15.74,2295627,544479,2023-04-18,MFS,441.45,0.00,2023-04-18,34.46,123.10,0.191000,0.205263,65,B3,United Kingdom,52.480608,-1.903612,1,1
3896,9.21,2295032,901131,2023-04-18,DX Freight,131.29,0.00,2023-04-18,15.54,31.51,0.255090,0.250000,113,B91,United Kingdom,52.414462,-1.775570,1,1
3932,13.46,2294429,634804,2023-04-18,DX Freight,40.58,12.23,2023-04-18,10.92,7.06,0.234362,0.091419,45,B10,United Kingdom,52.468728,-1.853245,1,1
4048,21.36,2295593,872742,2023-04-18,MFS,165.65,19.99,2023-04-18,35.96,84.90,0.605000,0.833333,93,B65,United Kingdom,52.484840,-2.043520,1,1


In [65]:
cluster1_databusiest.to_csv('dataset.csv')

In [66]:
print(cluster1_databusiest['Weight'].sum())
print(cluster1_databusiest['Volume (m3)'].sum())

13017.809999999998
104.325234665


In [59]:
dataset_cluster1 = cluster1_data[['Weight','Volume (m3)']]
dataset_cluster1

Unnamed: 0,Weight,Volume (m3)
0,110.00,0.225000
1,33.30,0.959000
2,21.80,0.209100
5,44.20,0.123500
6,51.12,0.129600
...,...,...
4096,116.77,2.970782
4098,27.34,0.279677
4099,39.99,0.236965
4107,80.00,0.132400


In [60]:
dataset_cluster1busy = cluster1_databusiest[['Weight','Volume (m3)']]

In [61]:
print(dataset_cluster1busy['Weight'].sum())
print(dataset_cluster1busy['Volume (m3)'].sum())

13017.809999999998
104.325234665


In [62]:
data = create_data_model(dataset_cluster1busy, totalLorry)
# Create the mip solver with the SCIP backend.
solver = pywraplp.Solver.CreateSolver('SCIP')
# Variables
# x[i, j] = 1 if item i is packed in bin j.
x = {}
for i in data['items']:
    for j in data['trucks']:
        x[(i, j)] = solver.IntVar(0, 1, 'x_%i_%i' % (i, j))
# Constraints
# Each item can be in at most one bin.
for i in data['items']:
    solver.Add(sum(x[i, j] for j in data['trucks']) <= 1)
# The amount packed in each bin cannot exceed its max weight.
for j in data['trucks']:
    solver.Add(
        sum(x[(i, j)] * data['weights'][i]
            for i in data['items']) <= data['max_weight'][j])
# The amount packed in each bin cannot exceed its max volume.
for j in data['trucks']:
    solver.Add(
        sum(x[(i, j)] * data['volumes'][i]
            for i in data['items']) <= data['max_volume'][j])
# Add objectives
objective = solver.Objective()
for i in data['items']:
    for j in data['trucks']:
        objective.SetCoefficient(x[(i, j)], data['volumes'][i])
objective.SetMaximization()
status = solver.Solve()

In [63]:
_totalLeftVolume = 0
_totalLeftWeight = 0
if status == pywraplp.Solver.OPTIMAL:
    assign = []
    total_weight = 0
    total_items = 0
    print('Total Lorry: ')
    display(totalLorry)
    print()
    print('Total Items:', len(dataset_cluster1busy))
    print()
    for j in data['trucks']:
        bin_weight = 0
        bin_volume = 0
        print('Truck ', j, '[', data['truck_types'][j] ,'] - max_weight:[', "{:,.2f}".format(data['max_weight'][j]), '] - max volume:[', "{:,.2f}".format(data['max_volume'][j]), ']' )
        for i in data['items']:
            if x[i, j].solution_value() > 0:
                assign.append(i)
                total_items += 1
                print('Item', i, '- weight:', data['weights'][i],
                      ' volumes:', data['volumes'][i])
                bin_weight += data['weights'][i]
                bin_volume += data['volumes'][i]
        print('Packed truck volume:', "{:,.2f}".format(bin_volume))
        print('Packed truck weight:', "{:,.2f}".format(bin_weight))
        print()     

        if (bin_volume > 0) & (bin_weight > 0):
            leftVolume = data['max_volume'][j] - bin_volume
            leftWeight = data['max_weight'][j] - bin_weight
        else:
            leftVolume = 0
            leftWeight = 0
        
        print('Left Volume', "{:,.2f}".format(leftVolume))
        print('Left Weight', "{:,.2f}".format(leftWeight))
        print()
        print()
        
        total_weight += bin_weight
        _totalLeftVolume += leftVolume
        _totalLeftWeight += leftWeight
    print('Total packed weight:', "{:,.2f}".format(total_weight))
    print('Total packed volume:', "{:,.2f}".format(objective.Value()))
    print('Total item assigned:', "{:,.0f}".format(total_items))
    print()
    print("#" * 70)
    print('Total Left Volume', "{:,.2f}".format(_totalLeftVolume))
    print('Total Left Weight', "{:,.2f}".format(_totalLeftWeight))
    print("#" * 70)

Total Lorry: 


[{'code': '18-tonne',
  'number': 3,
  'description': '18-tonne HGV',
  'max_volume': 45,
  'max_weight': 1000},
 {'code': '7.5-tonne',
  'number': 3,
  'description': '7.5-tonne LGV',
  'max_volume': 25,
  'max_weight': 4000},
 {'code': '3.5-tonne',
  'number': 3,
  'description': '3.5-tonne van',
  'max_volume': 2.5,
  'max_weight': 1500}]


Total Items: 61

Truck  0 [ 18-tonne ] - max_weight:[ 1,000.00 ] - max volume:[ 45.00 ]
Item 3 - weight: 561.6  volumes: 0.7065
Item 6 - weight: 123.35  volumes: 0.7863
Item 11 - weight: 21.0  volumes: 0.2272
Item 12 - weight: 25.2  volumes: 0.39426
Item 56 - weight: 123.1  volumes: 0.191
Item 57 - weight: 31.51  volumes: 0.25509
Item 58 - weight: 7.06  volumes: 0.234362
Item 59 - weight: 84.9  volumes: 0.605
Item 60 - weight: 18.9  volumes: 1.4112
Packed truck volume: 4.81
Packed truck weight: 996.62

Left Volume 40.19
Left Weight 3.38


Truck  1 [ 18-tonne ] - max_weight:[ 1,000.00 ] - max volume:[ 45.00 ]
Item 0 - weight: 110.0  volumes: 0.225
Item 7 - weight: 174.6  volumes: 0.61548
Item 8 - weight: 90.25  volumes: 1.01348
Item 10 - weight: 94.0  volumes: 0.326
Item 13 - weight: 10.7  volumes: 0.4368
Item 14 - weight: 189.12  volumes: 0.403448
Item 15 - weight: 44.28  volumes: 0.0972
Item 16 - weight: 128.02  volumes: 0.356375
Item 17 - weight: 23.09  volumes: 0.60673
Item 18 - we