# Truck Analysis

# 

### Importing Neccesary Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Suppressing warnings
import warnings  
warnings.filterwarnings('ignore')

# 

### Loading and Inspecting the Data

In [2]:
# Importing the CSV file into a DataFrame
kite = pd.read_csv('clustered_data.csv', index_col = 0)

# Viewing the DataFrame
kite.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,52.496759,-2.173682,1
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,52.496759,-2.173682,1


In [3]:
# Checking the shape of the DataFrame
kite.shape

(4119, 17)

In [4]:
# Exploring the metadata
kite.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4119 entries, 0 to 4118
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   distance        4119 non-null   float64
 1   OrderID         4119 non-null   int64  
 2   WebsiteID       4119 non-null   int64  
 3   OrderDate       4119 non-null   object 
 4   Courier         4119 non-null   object 
 5   ProductsExVAT   4119 non-null   float64
 6   DeliveryExVAT   4119 non-null   float64
 7   DateDespatched  4119 non-null   object 
 8   DeliveryCost    4119 non-null   float64
 9   Weight          4119 non-null   float64
 10  Volume (m3)     4119 non-null   float64
 11  Pallets         4119 non-null   float64
 12  id              4119 non-null   int64  
 13  Outward Code    4119 non-null   object 
 14  latitude        4119 non-null   float64
 15  longitude       4119 non-null   float64
 16  Cluster Label   4119 non-null   int64  
dtypes: float64(9), int64(4), object(4

In [5]:
# Changing the data types of OrderDate and DateDespatched to datetime
# Formatting the dates so they are yyyy/mm/dd
kite['OrderDate'] = pd.to_datetime(kite['OrderDate'], format = '%Y-%m-%d')
kite['DateDespatched'] = pd.to_datetime(kite['DateDespatched'], format = '%Y-%m-%d')

# Checking this has worked
print(kite.info())
kite.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4119 entries, 0 to 4118
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   distance        4119 non-null   float64       
 1   OrderID         4119 non-null   int64         
 2   WebsiteID       4119 non-null   int64         
 3   OrderDate       4119 non-null   datetime64[ns]
 4   Courier         4119 non-null   object        
 5   ProductsExVAT   4119 non-null   float64       
 6   DeliveryExVAT   4119 non-null   float64       
 7   DateDespatched  4119 non-null   datetime64[ns]
 8   DeliveryCost    4119 non-null   float64       
 9   Weight          4119 non-null   float64       
 10  Volume (m3)     4119 non-null   float64       
 11  Pallets         4119 non-null   float64       
 12  id              4119 non-null   int64         
 13  Outward Code    4119 non-null   object        
 14  latitude        4119 non-null   float64       
 15  long

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,52.496759,-2.173682,1
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,52.496759,-2.173682,1


# 

### Only Looking at Working Days

In [7]:
# Creating a column to identify the day of the week that the order was dispatched
kite['weekday'] = kite['DateDespatched'].apply(lambda x: x.weekday())

# Removing orders from the DataFrame that were dispatched on a Saturday (5) or Sunday (6)    
kite_wd = kite.loc[~((kite['weekday'] == 5) | (kite['weekday'] == 6))]

# Checking this has worked
kite_wd['weekday'].unique()

array([1, 3, 0, 2, 4], dtype=int64)

In [8]:
# Removing orders that were dispatched on the Easter bank holidays April 7th & 10th
kite_wd = kite_wd[~kite_wd['DateDespatched'].isin([pd.Timestamp('20230407'), pd.Timestamp('20230410')])]

# Checking this has worked
kite_wd['DateDespatched'].unique()

array(['2023-04-18T00:00:00.000000000', '2023-03-28T00:00:00.000000000',
       '2023-03-30T00:00:00.000000000', '2023-03-27T00:00:00.000000000',
       '2023-04-05T00:00:00.000000000', '2023-04-14T00:00:00.000000000',
       '2023-04-21T00:00:00.000000000', '2023-04-04T00:00:00.000000000',
       '2023-04-13T00:00:00.000000000', '2023-04-19T00:00:00.000000000',
       '2023-04-17T00:00:00.000000000', '2023-03-31T00:00:00.000000000',
       '2023-04-11T00:00:00.000000000', '2023-04-20T00:00:00.000000000',
       '2023-04-03T00:00:00.000000000', '2023-04-24T00:00:00.000000000',
       '2023-03-29T00:00:00.000000000', '2023-04-12T00:00:00.000000000',
       '2023-04-06T00:00:00.000000000'], dtype='datetime64[ns]')

In [9]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1,3
3,26.84,2275231,483789,2023-03-27,DX Express,76.95,7.22,2023-03-27,6.81,17.77,0.0402,0.030342,766,DY6,52.496759,-2.173682,1,0
4,26.84,2285334,483789,2023-04-05,DX Express,147.3,0.0,2023-04-05,11.46,34.7,0.100552,0.052361,766,DY6,52.496759,-2.173682,1,2


# 

### Only Looking to Replace DX Freight, MFS, and Split

In [11]:
# Removing DX Express from the DataFrame
kite_wd = kite_wd.loc[~(kite_wd['Courier'] == "DX Express")]

# Checking this has worked
kite_wd['Courier'].unique()

array(['MFS', 'DX Freight', 'Split'], dtype=object)

In [12]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.0,0.0,2023-04-14,20.55,44.2,0.1235,0.054167,766,DY6,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.0,2023-04-21,23.25,51.12,0.1296,0.1,766,DY6,52.496759,-2.173682,1,4


# 

### Only Looking at Cluster 1 (Birmingham)

In [13]:
# Creating a subset of the DataFrame to only look at cluster 1 (Birmingham)
kite_wd = kite_wd.loc[kite_wd['Cluster Label'] == 1]

# Checking this has worked
kite_wd['Cluster Label'].unique()

array([1], dtype=int64)

In [14]:
# Viewing the DataFrame
kite_wd.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1,1
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.0,0.0,2023-04-14,20.55,44.2,0.1235,0.054167,766,DY6,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.0,2023-04-21,23.25,51.12,0.1296,0.1,766,DY6,52.496759,-2.173682,1,4


# 

### Writing a Function to Compute the Trucks Needed 

In [15]:
# Creating a function to identify trucks needed based on weight
def trucks_weight(x):
    if x.iloc[0] <= 1500:
        return 3.5
    elif 1500 < x.iloc[0] <= 4000:
        return 7.5
    elif 4000 < x.iloc[0] <= 10000:
        return 18
    else:
        return 1000 

In [16]:
# Creating a function to identify trucks needed based on volume
def trucks_volume(x):
    if x.iloc[1] <= 2.5:
        return 3.5
    elif 2.5 < x.iloc[1] <= 25:
        return 7.5
    elif 25 < x.iloc[1] <= 45:
        return 18
    else:
        return 1000

In [17]:
# Creating a function to identify trucks needed based on pallets
def trucks_pallet(x):
    if x.iloc[2] <= 2:
        return 3.5
    elif 2 < x.iloc[2] <= 14:
        return 7.5
    elif 14 < x.iloc[2] <= 24:
        return 18
    else:
        return 1000

In [18]:
# Creating a function to identify the number of trucks needed based on weight, volume, and pallets
def trucks_needed(x):
    truck = max(trucks_weight(x), trucks_volume(x), trucks_pallet(x))     
    if truck < 1000:
        print("One", truck, "- tonne truck is needed.")
    else:
        print("More investigation is needed.")

# 

### Investigating the Number of Trucks Needed (Median)

In [19]:
# Calculating the median weight, volume, number of pallets, and delivery cost
# for orders carried out by each courier
courier1 = kite_wd.groupby(['Courier'])['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost'].median()
courier1

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,DeliveryCost
Courier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DX Freight,42.63,0.345973,0.25,19.78
MFS,157.5,1.01348,0.786706,34.46
Split,215.03,2.832923,1.779365,53.94


In [20]:
# Calulating the number of orders carried out by each courier for each day
courier2 = kite_wd.pivot_table(index = 'Courier', 
                               columns = 'DateDespatched', 
                               values = 'OrderID', aggfunc = 'count')
courier2

DateDespatched,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-03,2023-04-04,2023-04-05,2023-04-06,2023-04-11,2023-04-12,2023-04-13,2023-04-14,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-24
Courier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
DX Freight,15.0,41.0,24.0,21.0,11.0,32.0,23.0,24.0,22.0,29.0,31.0,21.0,21.0,26.0,31.0,33.0,25.0,23.0,7.0
MFS,14.0,13.0,12.0,16.0,19.0,19.0,24.0,14.0,15.0,20.0,20.0,21.0,13.0,16.0,28.0,20.0,17.0,18.0,2.0
Split,3.0,,1.0,3.0,,2.0,2.0,1.0,,2.0,3.0,4.0,2.0,2.0,2.0,1.0,,,


In [21]:
# Calculating the median number of orders on a single day for each courier 
courier3 = courier2.median(axis=1)
courier3

Courier
DX Freight    24.0
MFS           17.0
Split          2.0
dtype: float64

#### To Replace DX Freight

In [22]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of DX Freight orders on a single working day 
dxfreight = courier1.iloc[0]*courier3.iloc[0]
dxfreight

Weight          1023.12000
Volume (m3)        8.30334
Pallets            6.00000
DeliveryCost     474.72000
Name: DX Freight, dtype: float64

In [23]:
# Computing the trucks needed to replace DX Freight
trucks_needed(dxfreight)

One 7.5 - tonne truck is needed.


#### To Replace MFS

In [24]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of MFS orders on a single working day 
mfs = courier1.iloc[1]*courier3.iloc[1]
mfs

Weight          2677.500000
Volume (m3)       17.229160
Pallets           13.374008
DeliveryCost     585.820000
Name: MFS, dtype: float64

In [25]:
# Computing the trucks needed to replace MFS
trucks_needed(mfs)

One 7.5 - tonne truck is needed.


#### To Replace Split

In [26]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of Split orders on a single working day 
split = courier1.iloc[2]*courier3.iloc[2]
split

Weight          430.060000
Volume (m3)       5.665846
Pallets           3.558730
DeliveryCost    107.880000
Name: Split, dtype: float64

In [27]:
# Computing the trucks needed to replace Split
trucks_needed(split)

One 7.5 - tonne truck is needed.


#### To Replace MFS and Split

In [28]:
# Now grouping MFS and Split together to see the number of trucks needed to replace them as a group 
split_mfs = split + mfs
split_mfs

Weight          3107.560000
Volume (m3)       22.895006
Pallets           16.932738
DeliveryCost     693.700000
dtype: float64

In [29]:
# Computing the trucks needed to replace the grouped MFS and Split orders
trucks_needed(split_mfs)

One 18 - tonne truck is needed.


Based on median values: 

It seems that, if we look to replace each courier, we can replace the DX Freight orders with one 7.5-tonne LGV. For DX Freight, the median number of orders in a day is 24. Similarly, the median number of orders in a day for MFS and Split is 17 and 2 respectively. It doesn't make financial sense to replace a courier that only carries out 2 orders a day (Split), so let's group MFS and Split together. Grouping these couriers together, it seems we can replace them with one 18-tonne HGV. 

# 

### Replacing DX Freight with One 7.5-Tonne LGV

In [30]:
# Subsetting the data to only look at DX Freight orders
dxfreight_df = kite_wd[kite_wd['Courier'] == "DX Freight"]

# Checking this has worked
dxfreight_df['Courier'].unique()

array(['DX Freight'], dtype=object)

In [31]:
# Viewing the DataFrame
dxfreight_df.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
1,26.84,2277415,67940,2023-03-28,DX Freight,236.61,0.0,2023-03-28,16.45,33.3,0.959,0.473039,766,DY6,52.496759,-2.173682,1,1
2,26.84,2279524,67940,2023-03-30,DX Freight,70.58,13.2,2023-03-30,11.79,21.8,0.2091,0.222222,766,DY6,52.496759,-2.173682,1,3
5,26.84,2292112,778040,2023-04-14,DX Freight,104.0,0.0,2023-04-14,20.55,44.2,0.1235,0.054167,766,DY6,52.496759,-2.173682,1,4
6,26.84,2298627,846996,2023-04-21,DX Freight,111.36,0.0,2023-04-21,23.25,51.12,0.1296,0.1,766,DY6,52.496759,-2.173682,1,4
42,29.8,2296550,249205,2023-04-19,DX Freight,215.76,0.0,2023-04-19,20.94,45.9,0.135,0.09375,2915,WS15,52.765303,-1.911238,1,2


In [32]:
# Calculating the total weight, volume, and number of pallets for DX Freight orders each day
replace_dxf = dxfreight_df.groupby(['DateDespatched'])['Weight', 'Volume (m3)', 'Pallets'].sum()
replace_dxf

Unnamed: 0_level_0,Weight,Volume (m3),Pallets
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-27,826.62,9.355092,6.38008
2023-03-28,1872.76,36.521539,19.655789
2023-03-29,1025.74,19.243912,11.310279
2023-03-30,856.01,5.778522,5.025358
2023-03-31,672.77,17.71102,10.321675
2023-04-03,1232.49,32.270351,17.804015
2023-04-04,929.13,9.839613,6.21533
2023-04-05,910.91,23.917254,12.657859
2023-04-06,1092.88,33.981889,20.09046
2023-04-11,1064.68,25.870967,15.690822


In [44]:
# Checking that the weight each day is within the capacity of the 7.5-tonne LGV
replace_dxf['Weight_Capacity'] = replace_dxf['Weight']/4000

# Checking that the volume each day is within the capacity of the 7.5-tonne LGV
replace_dxf['Volume_Capacity'] = replace_dxf['Volume (m3)']/25

# Checking that the volume each day is within the capacity of the 7.5-tonne LGV
replace_dxf['Pallet_Capacity'] = replace_dxf['Pallets']/14

# Viewing these new columns
replace_dxf

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,busyness,Weight_Capacity,Volume_Capacity,Pallet_Capacity
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-03-27,826.62,9.355092,6.38008,842.355172,0.206655,0.374204,0.45572
2023-03-28,1872.76,36.521539,19.655789,1928.937329,0.46819,1.460862,1.403985
2023-03-29,1025.74,19.243912,11.310279,1056.294191,0.256435,0.769756,0.807877
2023-03-30,856.01,5.778522,5.025358,866.81388,0.214002,0.231141,0.358954
2023-03-31,672.77,17.71102,10.321675,700.802695,0.168192,0.708441,0.737262
2023-04-03,1232.49,32.270351,17.804015,1282.564366,0.308123,1.290814,1.271715
2023-04-04,929.13,9.839613,6.21533,945.184944,0.232283,0.393585,0.443952
2023-04-05,910.91,23.917254,12.657859,947.485114,0.227727,0.95669,0.904133
2023-04-06,1092.88,33.981889,20.09046,1146.952349,0.27322,1.359276,1.435033
2023-04-11,1064.68,25.870967,15.690822,1106.241788,0.26617,1.034839,1.120773


In [47]:
# Identifying days when the orders have run over the capacity of the 7.5-tonne LGV
dxf_overcap = replace_dxf[(replace_dxf['Weight_Capacity'] > 1) | (replace_dxf['Volume_Capacity'] > 1) | (replace_dxf['Pallet_Capacity'] > 1)]
dxf_overcap

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,busyness,Weight_Capacity,Volume_Capacity,Pallet_Capacity
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-03-28,1872.76,36.521539,19.655789,1928.937329,0.46819,1.460862,1.403985
2023-04-03,1232.49,32.270351,17.804015,1282.564366,0.308123,1.290814,1.271715
2023-04-06,1092.88,33.981889,20.09046,1146.952349,0.27322,1.359276,1.435033
2023-04-11,1064.68,25.870967,15.690822,1106.241788,0.26617,1.034839,1.120773
2023-04-14,902.91,25.841455,14.121481,942.872936,0.225727,1.033658,1.008677


In [49]:
# What percentage of days are running over capacity of the 7.5-tonne LGV
len(dxf_overcap)/len(replace_dxf)

0.2631578947368421

It appears that on 26% of days, the orders exceed the capacity of the 7.5-tonne LGV.

# 

### Replacing MFS and Split with One 18-Tonne HGV

In [50]:
# Subsetting the data to only look at MFS and Split orders
mfs_split_df = kite_wd[(kite_wd['Courier'] == "MFS") | (kite_wd['Courier'] == "Split")]

# Checking this has worked
mfs_split_df['Courier'].unique()

array(['MFS', 'Split'], dtype=object)

In [51]:
# Viewing the DataFrame
mfs_split_df.head()

Unnamed: 0,distance,OrderID,WebsiteID,OrderDate,Courier,ProductsExVAT,DeliveryExVAT,DateDespatched,DeliveryCost,Weight,Volume (m3),Pallets,id,Outward Code,latitude,longitude,Cluster Label,weekday
0,26.84,2295440,7700,2023-04-18,MFS,375.4,0.0,2023-04-18,34.46,110.0,0.225,0.2,766,DY6,52.496759,-2.173682,1,1
43,29.8,2297374,364306,2023-04-20,MFS,439.74,0.0,2023-04-20,34.46,156.9,0.3168,0.298611,2915,WS15,52.765303,-1.911238,1,3
45,29.8,2293512,422587,2023-04-17,MFS,578.4,0.0,2023-04-17,34.46,187.2,0.528,0.357143,2915,WS15,52.765303,-1.911238,1,0
48,29.8,2280483,624097,2023-03-31,MFS,367.64,0.0,2023-03-31,34.46,159.13,1.255236,1.049603,2915,WS15,52.765303,-1.911238,1,4
50,29.8,2279451,639102,2023-03-30,MFS,497.8,0.0,2023-03-30,39.15,96.0,0.8424,0.222222,2915,WS15,52.765303,-1.911238,1,3


In [52]:
# Calculating the total weight, volume, and number of pallets for MFS and Split orders each day
replace_mfs_split = mfs_split_df.groupby(['DateDespatched'])['Weight', 'Volume (m3)', 'Pallets'].sum()
replace_mfs_split

Unnamed: 0_level_0,Weight,Volume (m3),Pallets
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-27,5258.6,53.655274,31.872026
2023-03-28,2783.17,14.399741,10.442143
2023-03-29,2617.67,25.164992,14.56513
2023-03-30,4647.91,44.811851,25.579261
2023-03-31,5872.25,43.401611,24.428275
2023-04-03,7042.39,47.336508,34.286634
2023-04-04,7563.39,73.980913,52.858549
2023-04-05,6299.15,39.016791,25.287707
2023-04-06,2730.63,23.083097,15.769095
2023-04-11,7076.19,74.46244,48.492924


In [55]:
# Checking that the weight each day is within the capacity of the 18-tonne HGV
replace_mfs_split['Weight_Capacity'] = replace_mfs_split['Weight']/10000

# Checking that the volume each day is within the capacity of the 18-tonne HGV
replace_mfs_split['Volume_Capacity'] = replace_mfs_split['Volume (m3)']/45

# Checking that the volume each day is within the capacity of the 18-tonne HGV
replace_mfs_split['Pallet_Capacity'] = replace_mfs_split['Pallets']/24

# Viewing these new columns
replace_mfs_split

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,Weight_Capacity,Volume_Capacity,Pallet_Capacity
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-27,5258.6,53.655274,31.872026,0.52586,1.192339,1.328001
2023-03-28,2783.17,14.399741,10.442143,0.278317,0.319994,0.435089
2023-03-29,2617.67,25.164992,14.56513,0.261767,0.559222,0.60688
2023-03-30,4647.91,44.811851,25.579261,0.464791,0.995819,1.065803
2023-03-31,5872.25,43.401611,24.428275,0.587225,0.96448,1.017845
2023-04-03,7042.39,47.336508,34.286634,0.704239,1.051922,1.42861
2023-04-04,7563.39,73.980913,52.858549,0.756339,1.64402,2.20244
2023-04-05,6299.15,39.016791,25.287707,0.629915,0.86704,1.053654
2023-04-06,2730.63,23.083097,15.769095,0.273063,0.512958,0.657046
2023-04-11,7076.19,74.46244,48.492924,0.707619,1.654721,2.020538


In [56]:
# Identifying days when the orders have run over the capacity of the 18-tonne HGV
mfs_split_overcap = replace_mfs_split[(replace_mfs_split['Weight_Capacity'] > 1) | (replace_mfs_split['Volume_Capacity'] > 1) | (replace_mfs_split['Pallet_Capacity'] > 1)]
mfs_split_overcap

Unnamed: 0_level_0,Weight,Volume (m3),Pallets,Weight_Capacity,Volume_Capacity,Pallet_Capacity
DateDespatched,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-27,5258.6,53.655274,31.872026,0.52586,1.192339,1.328001
2023-03-30,4647.91,44.811851,25.579261,0.464791,0.995819,1.065803
2023-03-31,5872.25,43.401611,24.428275,0.587225,0.96448,1.017845
2023-04-03,7042.39,47.336508,34.286634,0.704239,1.051922,1.42861
2023-04-04,7563.39,73.980913,52.858549,0.756339,1.64402,2.20244
2023-04-05,6299.15,39.016791,25.287707,0.629915,0.86704,1.053654
2023-04-11,7076.19,74.46244,48.492924,0.707619,1.654721,2.020538
2023-04-12,4364.52,50.739469,28.119105,0.436452,1.127544,1.171629
2023-04-13,11075.07,75.498317,50.681381,1.107507,1.67774,2.111724
2023-04-14,5263.8,36.384762,26.489169,0.52638,0.80855,1.103715


In [57]:
# What percentage of days are running over capacity of the 18-tonne HGV
len(mfs_split_overcap)/len(replace_mfs_split)

0.5789473684210527

It appears that on 58% of days, the orders exceed the capacity of the 18-tonne HGV.

# 

**CAN IGNORE FROM HERE ONWARDS FOR NOW**

### Investigating the Number of Trucks Needed (25th Percentile)

In [None]:
# Calculating the 25th percentile of weight, volume, number of pallets, and delivery cost
# for orders carried out by each courier
courier1 = kite_wd.groupby(['Courier'])['Weight', 'Volume (m3)', 'Pallets', 'DeliveryCost'].quantile(0.25) 
courier1

In [None]:
# Calulating the number of orders carried out by each courier for each day
courier2 = kite_wd.pivot_table(index = 'Courier', 
                               columns = 'DateDespatched', 
                               values = 'OrderID', aggfunc = 'count')
courier2

In [None]:
# Calculating the 25th percentile number of orders on a single day for each courier 
courier3 = courier2.quantile(0.25, axis=1)
courier3

#### To Replace DX Freight

In [None]:
# Based on 25th percentiles, calculating the total weight, volume, number of pallets, and delivery cost
# of DX Freight orders on a single working day 
dxfreight = courier1.iloc[0]*courier3.iloc[0]
dxfreight

In [None]:
# Computing the trucks needed to replace DX Freight
trucks_needed(dxfreight)

#### To Replace MFS

In [None]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of MFS orders on a single working day 
mfs = courier1.iloc[1]*courier3.iloc[1]
mfs

In [None]:
# Computing the trucks needed to replace MFS
trucks_needed(mfs)

#### To Replace Split

In [None]:
# Based on medians, calculating the total weight, volume, number of pallets, and delivery cost
# of Split orders on a single working day 
split = courier1.iloc[2]*courier3.iloc[2]
split

In [None]:
# Computing the trucks needed to replace Split
trucks_needed(split)

#### To Replace MFS and Split

In [None]:
# Now grouping MFS and Split together to see the number of trucks needed to replace them as a group 
split_mfs = split + mfs
split_mfs

In [None]:
# Computing the trucks needed to replace the grouped MFS and Split orders
trucks_needed(split_mfs)