### Imports and Installations

In [1]:
import pandas as pd
from geopy.geocoders import ArcGIS
from ortools.constraint_solver import pywrapcp
from ortools.constraint_solver import routing_enums_pb2
import sqlite3
import folium

### Connect to Database


In [2]:
database_path = "/app/input/supply_chain.db"

conn = sqlite3.connect(database_path)

query = """
    SELECT * 
    FROM cleaned_order_data
"""
supply_chain_df = pd.read_sql_query(query, conn)

# Ensure 'Order Date' is in datetime format
supply_chain_df['Order Date'] = pd.to_datetime(supply_chain_df['Order Date'], errors='coerce')

### Initialize Values

In [3]:
today_date = '2016-05-03'

start_date = pd.to_datetime(today_date) - pd.DateOffset(years=1)
    
# Filter for orders within the specified year window
supply_chain_filtered = supply_chain_df[
    (supply_chain_df['Order Date'] >= start_date) & 
    (supply_chain_df['Order Date'] < pd.to_datetime(today_date))
]

first_order_date = supply_chain_df['Order Date'].min()
last_order_date = supply_chain_df['Order Date'].max()

print(f"First Order Date: {first_order_date}")
print(f"Last Order Date: {last_order_date}")

First Order Date: 2015-01-01 00:00:00
Last Order Date: 2018-01-31 00:00:00


For simplicity, we assumed every package has the same size. and each order takes up 1 capacity unit. we also assume the snapshot of undelivered orders contain orders from today_date-2 till today_date
The Initialised values can be altered, to test different suppliers, timeframes etc. 

In [4]:
supplier = 'Apparel' # Supplier we decide to focus o
num_vehicles = 4
vehicle_capacity = 18

# Calculate the maximum number of packages that can be sent out
max_packages = num_vehicles * vehicle_capacity
# Calculate buffer, algo works better with slight buffer. 
buffer = int(0.1*max_packages)

## Order prioritization system - Priority Score 
- this score is to help the supplier decide which packages to deliver based on the number of vehicles and their capacity. 
- priority score metrics and weights are pre-determined, but can be changed according to changing business goals, but for now, the goal is to reduce the number of late deliveries / reduce delivery times, so the benefit per order is given lower weightage. 

In [5]:
def calculate_late_delivery_risk(supply_chain_df, today_date):
    """
    Calculate the risk of late delivery based on a specified date and grouped by Product Name and Shipping Mode.
    
    Parameters:
    - supply_chain_df: DataFrame containing the supply chain data.
    - today_date: The date to use as a reference for calculating the one-year window.
    
    Returns:
    - DataFrame summarizing late delivery probabilities for each product and shipping mode.
    """
    
    # Calculate the year window (one year before the given date)
    start_date = pd.to_datetime(today_date) - pd.DateOffset(years=1)
    
    # Filter for orders within the specified year window
    supply_chain_filtered = supply_chain_df[
        (supply_chain_df['Order Date'] >= start_date) & 
        (supply_chain_df['Order Date'] < pd.to_datetime(today_date))
    ]

    # Group by 'Product Name' and 'Shipping Mode', calculate total orders and late deliveries
    late_delivery_summary = supply_chain_filtered.groupby(['Product Name', 'Shipping Mode']).agg(
        total_orders=('Index', 'size'),  # Count all orders per product and shipping mode
        late_deliveries=('Delivery Status', lambda x: (x == 'Late delivery').sum())  # Count late deliveries
    ).reset_index()

    # Calculate % late delivery in past year
    late_delivery_summary['late_delivery_probability'] = (
        late_delivery_summary['late_deliveries'] / late_delivery_summary['total_orders']
    )

    # Select relevant columns for the final summary
    late_delivery_summary = late_delivery_summary[['Product Name', 'Shipping Mode', 'late_delivery_probability']]
    
    return late_delivery_summary


# Test function with the corrected implementation
late_delivery_summary = calculate_late_delivery_risk(supply_chain_df, today_date).drop_duplicates()
print(late_delivery_summary)


## METRIC 2
#Average Shipping time for each product

def calculate_avg_shipping_time(supply_chain_df, today_date):
    """
    Calculate the average shipping time for each product by order region based on a specified date.
    
    Parameters:
    - supply_chain_df: DataFrame containing the supply chain data.
    - today_date: The date to use as a reference for calculating the one-year window.
    
    Returns:
    - DataFrame summarizing average shipping time for each product by order region.
    """
    
    # Calculate the year window (one year before the given date)
    start_date = pd.to_datetime(today_date) - pd.DateOffset(years=1)
    
    # Filter for orders within the specified year window
    supply_chain_filtered = supply_chain_df[
        (supply_chain_df['Order Date'] >= start_date) & 
        (supply_chain_df['Order Date'] < pd.to_datetime(today_date))
    ]

    # Group by 'Product Name' and 'Order Region' to calculate the average 'Days for shipping (real)'
    avg_shipping_days_per_product_region = supply_chain_filtered.groupby(['Product Name', 'Shipping Mode'])['Days for shipment (real)'].mean()
    
    # Reset index and rename the column
    avg_shipping_days_per_product_region_df = avg_shipping_days_per_product_region.reset_index().rename(columns={'Days for shipment (real)': 'Avg Shipping Time'})

    return avg_shipping_days_per_product_region_df


avg_shipping_days_per_product_region_df = calculate_avg_shipping_time(supply_chain_df, today_date).drop_duplicates()
print(avg_shipping_days_per_product_region_df)



                                      Product Name   Shipping Mode  \
0                          Bag Boy Beverage Holder     First Class   
1                          Bag Boy Beverage Holder        Same Day   
2                          Bag Boy Beverage Holder    Second Class   
3                          Bag Boy Beverage Holder  Standard Class   
4    Bridgestone e6 Straight Distance NFL Carolina     First Class   
..                                             ...             ...   
211      adidas Men's Germany Black Crest Away Tee  Standard Class   
212  adidas Youth Germany Black/Red Away Match Soc     First Class   
213  adidas Youth Germany Black/Red Away Match Soc        Same Day   
214  adidas Youth Germany Black/Red Away Match Soc    Second Class   
215  adidas Youth Germany Black/Red Away Match Soc  Standard Class   

     late_delivery_probability  
0                     0.944444  
1                     0.750000  
2                     0.894737  
3                     0.379

In [6]:
# Pre-processing of raw order data 
# Example, filter to a specific time frame where these orders are still undelivered at that time

## Example real-time data 
date = pd.to_datetime(today_date)
start_date = date - pd.Timedelta(days=2)  # Start 2 days before today_date
end_date = date  # End on today_date

temp_df = supply_chain_df[
    (supply_chain_df['Order Date'].between(start_date, end_date)) & 
    (supply_chain_df['Warehouse Name'] == supplier)
]

# Process real-time data to feed into calculate_priority_score
order_df = (
    temp_df.merge(late_delivery_summary[['Product Name', 'Shipping Mode', 'late_delivery_probability']], 
           on=['Product Name', 'Shipping Mode'], 
           how='left')
    .merge(avg_shipping_days_per_product_region_df[['Product Name', 'Shipping Mode', 'Avg Shipping Time']], 
           on=['Product Name', 'Shipping Mode'], 
           how='left')
)

# Fill NaN values in 'Avg Shipping Time' with values from 'Days for shipment (scheduled)'
order_df['Avg Shipping Time'] = order_df['Avg Shipping Time'].fillna(order_df['Days for shipment (scheduled)'])


# Calculate the scheduled delivery date
order_df['Scheduled Delivery Date'] = order_df['Order Date'] + pd.to_timedelta(order_df['Days for shipment (scheduled)'], unit='D')

# Create a new column for Days Till Scheduled Delivery Date
order_df['Days Till Scheduled Delivery'] = (order_df['Scheduled Delivery Date'] - date).dt.days


In [7]:
print(order_df.info())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 59 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   Transaction Type               128 non-null    object        
 1   Days for shipment (real)       128 non-null    int64         
 2   Days for shipment (scheduled)  128 non-null    int64         
 3   Sales per customer             128 non-null    float64       
 4   Delivery Status                128 non-null    object        
 5   Late_delivery_risk             128 non-null    object        
 6   Product Category               128 non-null    object        
 7   Customer City                  128 non-null    object        
 8   Customer Country               128 non-null    object        
 9   Customer Fname                 128 non-null    object        
 10  Customer Id                    128 non-null    int64         
 11  Customer Lname     

In [8]:
# Function to calculate priority score 

def calculate_priority_score(df, weights=None):
    """
    Calculate the priority score for each order based on late delivery probability, 
    days till scheduled delivery, and average shipping time, with custom weights.
    
    Parameters:
    - df: DataFrame containing the required columns: Order Id, Product Name, Order Region, 
           Avg Shipping Time, Order Profit, late_delivery_probability, Days Till Scheduled Delivery.
    - weights: Dictionary to customize weights for each metric. If None, default weights are used.
    
    Returns:
    - DataFrame with priority scores.
    """
    
    # Default weights
    default_weights = {
        'late_delivery_probability': 0.5,   # Higher weight for late delivery probability
        'Days Till Scheduled Delivery': 0.4, # Higher weight for days till scheduled delivery
        'Avg Shipping Time': 0.3,           # Moderate weight for average shipping time --> takes into consideration order region 
        'Order Profit': 0.1            # Lowest weight for Order Profit
    }
    
    # Use custom weights if provided
    if weights:
        for key in weights:
            if key in default_weights:
                default_weights[key] = weights[key]

    # Normalize Days Till Scheduled Delivery (lower values should result in higher priority, so use inverse normalization)
    days_till_min = df['Days Till Scheduled Delivery'].min()
    days_till_max = df['Days Till Scheduled Delivery'].max()
    df['Days Till Scheduled Delivery'] = (days_till_max - df['Days Till Scheduled Delivery']) / (days_till_max - days_till_min)

    # Normalize Avg Shipping Time (shorter shipping times should result in higher priority, so use inverse normalization)
    shipping_time_min = df['Avg Shipping Time'].min()
    shipping_time_max = df['Avg Shipping Time'].max()
    df['Avg Shipping Time'] = (shipping_time_max - df['Avg Shipping Time']) / (shipping_time_max - shipping_time_min)

    # Normalize Order Profit (direct normalization as higher Order Profit increases priority)
    benefit_min = df['Order Profit'].min()
    benefit_max = df['Order Profit'].max()
    df['Order Profit'] = (df['Order Profit'] - benefit_min) / (benefit_max - benefit_min)

    # Calculate the priority score using the updated formula
    df['priority_score'] = (
        df['late_delivery_probability'] * default_weights['late_delivery_probability'] +  # Higher probability, higher priority
        df['Days Till Scheduled Delivery'] * default_weights['Days Till Scheduled Delivery'] +  # Lower days till delivery, higher priority
        df['Avg Shipping Time'] * default_weights['Avg Shipping Time'] +  # Shorter shipping time, higher priority
        df['Order Profit'] * default_weights['Order Profit']  # Lower weight for Order Profit
    )
    
    return df



# Example usage:
priority_scores_df = calculate_priority_score(order_df).sort_values(by='priority_score', ascending=False)

# Example custom weights:
# custom_weights = {
#     'late_delivery_probability': 0.5,
#     'Lead Time': 0.3,
#     'Avg Shipping Time': 0.2,
#     'Order Profit': 0.1
# }

# priority_scores_df = calculate_priority_score(order_df, weights=custom_weights)


In [9]:
priority_scores_df

Unnamed: 0,Transaction Type,Days for shipment (real),Days for shipment (scheduled),Sales per customer,Delivery Status,Late_delivery_risk,Product Category,Customer City,Customer Country,Customer Fname,...,Shipping Day of Week,difference in shipment days,Index,Warehouse Latitude,Warehouse Longitude,late_delivery_probability,Avg Shipping Time,Scheduled Delivery Date,Days Till Scheduled Delivery,priority_score
39,PAYMENT,2,1,158.369995,Late delivery,Late,Sporting Goods,Panorama City,EE. UU.,Mary,...,Tuesday,1,28701,39.495914,-98.989983,0.956803,0.561038,2016-05-02,0.833333,1.065261
20,PAYMENT,2,1,128.690002,Late delivery,Late,Cleats,Lutz,EE. UU.,Raymond,...,Tuesday,1,18994,39.495914,-98.989983,0.961029,0.561038,2016-05-02,0.833333,1.065052
19,PAYMENT,2,1,129.990005,Late delivery,Late,Cleats,Lutz,EE. UU.,Raymond,...,Tuesday,1,18987,39.495914,-98.989983,0.961029,0.561038,2016-05-02,0.833333,1.064279
64,PAYMENT,2,1,201.570007,Late delivery,Late,Sporting Goods,Caguas,Puerto Rico,Lauren,...,Tuesday,1,59189,39.495914,-98.989983,0.956803,0.561038,2016-05-02,0.833333,1.060891
21,PAYMENT,2,1,109.180000,Late delivery,Late,Sporting Goods,Lutz,EE. UU.,Raymond,...,Tuesday,1,19221,39.495914,-98.989983,0.956803,0.561038,2016-05-02,0.833333,1.060156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26,DEBIT,2,4,95.980003,Advance shipping,Not Late,Sporting Goods,Caguas,Puerto Rico,Sharon,...,Thursday,-2,22611,39.495914,-98.989983,0.383632,0.003804,2016-05-07,0.000000,0.256349
28,DEBIT,6,4,98.379997,Late delivery,Late,Sporting Goods,Caguas,Puerto Rico,Michael,...,Monday,2,22676,39.495914,-98.989983,0.383632,0.003804,2016-05-07,0.000000,0.255349
116,TRANSFER,3,4,127.389999,Advance shipping,Not Late,Cleats,Caguas,Puerto Rico,George,...,Friday,-1,160765,39.495914,-98.989983,0.373085,0.010085,2016-05-07,0.000000,0.253738
122,TRANSFER,5,4,129.990005,Late delivery,Late,Cleats,South Richmond Hill,EE. UU.,Jennifer,...,Sunday,1,171073,39.495914,-98.989983,0.373085,0.010085,2016-05-07,0.000000,0.253536


### Filter Orders to be delivered today 

- to be fed into Route optimization

In [10]:
todays_deliveries = priority_scores_df.head(max_packages-buffer)

# Display the result
todays_deliveries.info()

<class 'pandas.core.frame.DataFrame'>
Index: 65 entries, 39 to 70
Data columns (total 60 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   Transaction Type               65 non-null     object        
 1   Days for shipment (real)       65 non-null     int64         
 2   Days for shipment (scheduled)  65 non-null     int64         
 3   Sales per customer             65 non-null     float64       
 4   Delivery Status                65 non-null     object        
 5   Late_delivery_risk             65 non-null     object        
 6   Product Category               65 non-null     object        
 7   Customer City                  65 non-null     object        
 8   Customer Country               65 non-null     object        
 9   Customer Fname                 65 non-null     object        
 10  Customer Id                    65 non-null     int64         
 11  Customer Lname           

## Route Optimization Algorithm
goal: minimize delivery time or delivery distance 

constraints: 
1. number of vehicles each day (edited according to day - mutable)
2. vehicle capacity (put a large amount for simplicity) - fixed at a random number 

Limitations: 
- This algorithm might not help if the timeline is unachievable anyway. for eg. the warehouse is in US but need to deliver to Singapore
- this algo is going into city centriods only, not to specific locations --> if we had more data, could use same algo on actual customer locations for better usability
- to further improve this in the future, we can focus on high value customers only. 



In [11]:
filtered_df = todays_deliveries

In [12]:
# Get location of city Centroids 


# Initialize the ArcGIS geocoder
geolocator = ArcGIS()

# Geocode each unique city along with state and country to get latitude and longitude
city_coords = []
skipped_cities = []  # List to hold skipped cities
unique_cities = filtered_df[['Order City', 'Order State', 'Order Country']].drop_duplicates()

for index, row in unique_cities.iterrows():
    city = row['Order City']
    state = row['Order State']
    country = row['Order Country']
    
    # Construct the full address
    full_address = f"{city}, {state}, {country}"
    
    try:
        # Use ArcGIS to geocode the full address
        location = geolocator.geocode(full_address)
        if location:
            city_coords.append({'City': city, 'State': state, 'Country': country, 
                                'Latitude': location.latitude, 'Longitude': location.longitude})
        else:
            skipped_cities.append(full_address)  # Add to skipped list if location is None
    except Exception as e:
        skipped_cities.append(full_address)  # Add to skipped list on exception

# Convert the list of coordinates into a DataFrame
city_df = pd.DataFrame(city_coords)
print(city_df)


              City               State        Country   Latitude   Longitude
0       Fort Worth               Texas  United States  32.749904  -97.330339
1           Auburn             Alabama  United States  32.609079  -85.481731
2     Johnson City           Tennessee  United States  36.320959  -82.341756
3   San Bernardino          California  United States  34.108317 -117.294096
4     Philadelphia         Pensilvania  United States  39.951060  -75.165620
5            Dover     Nuevo Hampshire  United States  43.190466  -70.877001
6          Chicago            Illinois  United States  41.883229  -87.632398
7    New York City          Nueva York  United States  40.713047  -74.007230
8          Deltona             Florida  United States  28.917512  -81.221278
9       Wilmington  Carolina del Norte  United States  34.236700  -77.946197
10     Little Rock            Arkansas  United States  34.748745  -92.275105
11         Phoenix             Arizona  United States  33.448204 -112.072585

In [13]:
# Merge the coordinates into the filtered DataFrame
filtered_df = filtered_df.merge(city_df, left_on=['Order City', 'Order State', 'Order Country'], 
                                                right_on=['City', 'State', 'Country'], how='left')

# Drop the unnecessary columns if needed
filtered_df.drop(columns=['City', 'State', 'Country'], inplace=True)

# Display the updated DataFrame
print(filtered_df.head())
print(filtered_df.info())

#Display the skipped cities
print("Skipped cities:", skipped_cities)

  Transaction Type  Days for shipment (real)  Days for shipment (scheduled)  \
0          PAYMENT                         2                              1   
1          PAYMENT                         2                              1   
2          PAYMENT                         2                              1   
3          PAYMENT                         2                              1   
4          PAYMENT                         2                              1   

   Sales per customer Delivery Status Late_delivery_risk Product Category  \
0          158.369995   Late delivery               Late   Sporting Goods   
1          128.690002   Late delivery               Late           Cleats   
2          129.990005   Late delivery               Late           Cleats   
3          201.570007   Late delivery               Late   Sporting Goods   
4          109.180000   Late delivery               Late   Sporting Goods   

   Customer City Customer Country Customer Fname  ...  Index  

In [14]:
# Create a distance matrix based on the haversine formula
def haversine(lat1, lon1, lat2, lon2):
    from math import radians, sin, cos, sqrt, atan2
    
    R = 6371  # Earth radius in kilometers
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  # Distance in kilometers


# Build the distance matrix with symmetrical distances
num_locations = len(filtered_df) + 1  # +1 for the supplier location
distance_matrix = [[0] * num_locations for _ in range(num_locations)]

# Supplier location (index 0)
supplier_lat = filtered_df['Warehouse Latitude'].unique()[0]
supplier_lon = filtered_df['Warehouse Longitude'].unique()[0]

# Fill in the distances
for i in range(num_locations):
    for j in range(i, num_locations):  # Start from i to ensure symmetry
        if i == 0:
            # Distance from supplier to customers (Supplier is at index 0)
            distance = haversine(supplier_lat, supplier_lon,
                                 filtered_df.iloc[j - 1]['Latitude'],
                                 filtered_df.iloc[j - 1]['Longitude']) if j > 0 else 0
        else:
            # Distance between customers
            if i == j:
                distance = 0  # Distance from a location to itself
            else:
                distance = haversine(filtered_df.iloc[i - 1]['Latitude'],
                                     filtered_df.iloc[i - 1]['Longitude'],
                                     filtered_df.iloc[j - 1]['Latitude'],
                                     filtered_df.iloc[j - 1]['Longitude'])
        
        # Update both (i, j) and (j, i) for symmetry
        distance_matrix[i][j] = distance
        distance_matrix[j][i] = distance

# Convert the distance matrix to integers for easy viewing
distance_matrix_int = [[int(round(cell)) for cell in row] for row in distance_matrix]

# Function to print the matrix in a readable format
def print_distance_matrix(matrix):
    print("Distance Matrix:")
    for row in matrix:
        print("\t".join(map(str, row)))

# Print the integer distance matrix
print_distance_matrix(distance_matrix_int)



# Create the routing index manager
manager = pywrapcp.RoutingIndexManager(num_locations, num_vehicles, 0)

# Create Routing Model
routing = pywrapcp.RoutingModel(manager)

# Create and register a transit callback for distance
def distance_callback(from_index, to_index):
    from_node = manager.IndexToNode(from_index)
    to_node = manager.IndexToNode(to_index)
    return int(distance_matrix[from_node][to_node])  # Scale down distances

# Register the distance callback
distance_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(distance_callback_index)

# Add Capacity constraint using AddDimensionWithVehicleCapacity
def demand_callback(from_index):
    # Each order has a demand of 1
    return 1  

# Register the demand callback
demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)

# Add Capacity dimension
routing.AddDimensionWithVehicleCapacity(
    demand_callback_index,
    0,  # null capacity slack
    [vehicle_capacity] * num_vehicles,  # List of vehicle maximum capacities
    True,  # start cumul to zero
    "Capacity"
)

# Solve the problem
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
    routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)  # Minimize based on distance

solution = routing.SolveWithParameters(search_parameters)

# Function to print the solution
def print_solution(routing, manager, solution):
    total_distance = 0
    for vehicle_id in range(num_vehicles):
        index = routing.Start(vehicle_id)
        route_distance = 0
        route = []
        while not routing.IsEnd(index):
            route.append(manager.IndexToNode(index))
            previous_index = index
            index = solution.Value(routing.NextVar(index))
            route_distance += routing.GetArcCostForVehicle(previous_index, index, vehicle_id)
        route.append(manager.IndexToNode(index))  # Add end of route
        total_distance += route_distance
        print(f'Route for vehicle {vehicle_id + 1}: {route} Distance: {route_distance}')
    print(f'Total Distance: {total_distance}')

# Print solution
if solution:
    print_solution(routing, manager, solution)
else:
    print("No solution found!")
    


Distance Matrix:
0	765	1433	1433	1433	1433	1500	1433	1500	1500	1500	1732	1500	2032	2371	2371	993	2122	2122	2032	2032	2007	1956	2122	795	2007	2032	1347	2003	2032	771	765	1777	1777	2102	1374	1814	1374	2299	1777	2032	1814	2299	2299	1814	2032	2032	2032	2299	2330	2299	739	1374	739	1347	1347	1732	514	514	2075	2032	2075	2003	2032	2075	1764
765	0	1108	1108	1108	1108	1427	1108	1427	1427	1427	1856	1427	2133	2578	2578	1327	2250	2250	2133	2133	1594	1802	2250	517	1594	2133	1374	2260	2133	31	0	1934	1934	2223	1646	1943	1646	2487	1934	2133	1943	2487	2487	1943	2133	2133	2133	2487	2492	2487	1322	1646	1322	1280	1280	1856	1036	1036	2670	2133	2670	2096	2133	2670	1889
1433	1108	0	0	0	0	503	0	503	503	503	2947	503	1232	1735	1735	1049	1361	1361	1232	1232	578	722	1361	672	578	1232	2474	3320	1232	1078	1108	3019	3019	1330	1102	3035	1102	1633	3019	1232	3035	1633	1633	3035	1232	1232	1232	1633	1609	1633	1406	1102	1406	2387	2387	2947	1916	1916	3505	1232	3505	1192	1232	3505	2981
1433	1108	0	0	0	0	503	0	503	503	503	294

## Plot Optimized Route

In [15]:
# Create a base map centered around the supplier's location
supplier_lat = filtered_df['Warehouse Latitude'].unique()[0]
supplier_lon = filtered_df['Warehouse Longitude'].unique()[0]
base_map = folium.Map(location=[supplier_lat, supplier_lon], zoom_start=8)

    
vehicle_colors = ['blue', 'green', 'orange', 'purple', 'black', 'red', 'yellow'] 

# Function to plot routes
def plot_route(route, vehicle_id):
    # Plot each point in the route
    for i in range(len(route) - 1):
        start_node = route[i]
        end_node = route[i + 1]
        
        # Get latitudes and longitudes for the start and end nodes
        if start_node == 0:  # Supplier location
            start_lat, start_lon = supplier_lat, supplier_lon
        else:
            start_lat = filtered_df.iloc[start_node - 1]['Latitude']
            start_lon = filtered_df.iloc[start_node - 1]['Longitude']

        if end_node == 0:  # Supplier location
            end_lat, end_lon = supplier_lat, supplier_lon
        else:
            end_lat = filtered_df.iloc[end_node - 1]['Latitude']
            end_lon = filtered_df.iloc[end_node - 1]['Longitude']

        # Create a line between points
        folium.PolyLine(
            locations=[[start_lat, start_lon], [end_lat, end_lon]],
            color=vehicle_colors[vehicle_id - 1],  # Different colors for different vehicles
            weight=2.5,
            opacity=1
        ).add_to(base_map)

    # Mark the route points
    for node in route:
        if node == 0:  # Supplier location
            folium.Marker(
                location=[supplier_lat, supplier_lon],
                popup='Supplier',
                icon=folium.Icon(color='red')
            ).add_to(base_map)
        else:
            lat = filtered_df.iloc[node - 1]['Latitude']
            lon = filtered_df.iloc[node - 1]['Longitude']
            folium.Marker(
                location=[lat, lon],
                popup=f'Order {node}',
                icon=folium.Icon(color='blue')
            ).add_to(base_map)

# Plot routes for each vehicle
for vehicle_id in range(num_vehicles):
    index = routing.Start(vehicle_id)
    route = []
    while not routing.IsEnd(index):
        route.append(manager.IndexToNode(index))
        index = solution.Value(routing.NextVar(index))
    route.append(manager.IndexToNode(index))  # Add end of route
    plot_route(route, vehicle_id + 1)

# Display the map
base_map



## Justification of Usefulness

Cases for comparison:
* First In First Out - slighlty unrealistic, but used as a baseline worst case scenario 
* Order by region - group orders by region, route of delivery to similar regions together

Comparing distances travelled by these two cases with our optimized case, we see that our optimized route travels a much shorter distance. 
* refer to map for specific routes 
* lower distances in theory --> decreased fuel costs and delivery timings



Order by FIFO

In [16]:
# Initialize routes for each vehicle
vehicle_routes = [[] for _ in range(num_vehicles)]  # List to hold routes for each vehicle
current_capacities = [0] * num_vehicles  # Track current capacities of each vehicle

# FIFO order assignment
for i, order in enumerate(filtered_df.itertuples()):
    assigned = False
    for vehicle_id in range(num_vehicles):
        if current_capacities[vehicle_id] < vehicle_capacity:
            vehicle_routes[vehicle_id].append(i + 1)  # i + 1 to account for supplier at index 0
            current_capacities[vehicle_id] += 1
            assigned = True
            break
    if not assigned:
        print(f"Could not assign order {i + 1}, all vehicles at capacity.")

# Supplier location
supplier_lat = filtered_df['Warehouse Latitude'].unique()[0]
supplier_lon = filtered_df['Warehouse Longitude'].unique()[0]

# Function to calculate total distance for all vehicle routes
def calculate_total_distance(vehicle_routes, filtered_df):
    total_distance = 0.0
    
    for route in vehicle_routes:
        route_distance = 0.0
        # Loop through each pair of points in the route
        for i in range(len(route) - 1):
            start_node = route[i]
            end_node = route[i + 1]
            
            # Get coordinates for the start and end nodes
            if start_node == 0:  # Supplier location
                start_lat, start_lon = supplier_lat, supplier_lon
            else:
                start_lat = filtered_df.iloc[start_node - 1]['Latitude']
                start_lon = filtered_df.iloc[start_node - 1]['Longitude']
                
            if end_node == 0:  # Supplier location
                end_lat, end_lon = supplier_lat, supplier_lon
            else:
                end_lat = filtered_df.iloc[end_node - 1]['Latitude']
                end_lon = filtered_df.iloc[end_node - 1]['Longitude']
                
            # Calculate distance between start and end nodes and add to route distance
            route_distance += haversine(start_lat, start_lon, end_lat, end_lon)
        
        # Add route distance to the total distance for all vehicles
        total_distance += route_distance
    
    return total_distance

# Calculate and print the total distance
total_distance_traveled = calculate_total_distance(vehicle_routes, filtered_df)
print(f"Total distance traveled by all vehicles: {total_distance_traveled:.2f} km")

# Create a base map centered around the supplier's location
base_map = folium.Map(location=[supplier_lat, supplier_lon], zoom_start=8)

# Define vehicle colors
vehicle_colors = ['blue', 'green', 'orange', 'purple' ]  # Adjusted for 4 vehicles

# Function to plot routes
def plot_route(route, vehicle_id):
    # Plot each point in the route
    for i in range(len(route) - 1):
        start_node = route[i]
        end_node = route[i + 1]
        
        # Get latitudes and longitudes for the start and end nodes
        if start_node == 0:  # Supplier location
            start_lat, start_lon = supplier_lat, supplier_lon
        else:
            start_lat = filtered_df.iloc[start_node - 1]['Latitude']
            start_lon = filtered_df.iloc[start_node - 1]['Longitude']

        if end_node == 0:  # Supplier location
            end_lat, end_lon = supplier_lat, supplier_lon
        else:
            end_lat = filtered_df.iloc[end_node - 1]['Latitude']
            end_lon = filtered_df.iloc[end_node - 1]['Longitude']

        # Create a line between points
        folium.PolyLine(
            locations=[[start_lat, start_lon], [end_lat, end_lon]],
            color=vehicle_colors[vehicle_id % len(vehicle_colors)],  # Use modulo for colors
            weight=2.5,
            opacity=1
        ).add_to(base_map)

    # Mark the route points
    for node in route:
        if node == 0:  # Supplier location
            folium.Marker(
                location=[supplier_lat, supplier_lon],
                popup='Supplier',
                icon=folium.Icon(color='red')
            ).add_to(base_map)
        else:
            lat = filtered_df.iloc[node - 1]['Latitude']
            lon = filtered_df.iloc[node - 1]['Longitude']
            folium.Marker(
                location=[lat, lon],
                popup=f'Order {node}',
                icon=folium.Icon(color='blue')
            ).add_to(base_map)

# Plot routes for each vehicle
for vehicle_id in range(num_vehicles):
    route = vehicle_routes[vehicle_id]
    route.insert(0, 0)  # Start from the supplier (index 0)
    route.append(0)      # Return to the supplier at the end
    plot_route(route, vehicle_id)

# Display the map
base_map


Total distance traveled by all vehicles: 91044.15 km


Order by Region, State, City
- according to research, this is a plausible way companies decide on their delivery resource allocation, by grouping orders based on state and city, to ship them together

In [17]:
# Initialize routes for each vehicle
vehicle_routes = [[] for _ in range(num_vehicles)]  # List to hold routes for each vehicle
current_capacities = [0] * num_vehicles  # Track current capacities of each vehicle

# Group orders by Order Region, Order State, and Order City and extract the relevant order indices
grouped_orders = filtered_df.groupby(['Order Region', 'Order State', 'Order City'])['Order Id'].apply(list).to_dict()

# Assign orders to vehicles based on grouped criteria
for location, orders in grouped_orders.items():
    for order in orders:
        assigned = False
        for vehicle_id in range(num_vehicles):
            if current_capacities[vehicle_id] < vehicle_capacity:
                vehicle_routes[vehicle_id].append(order)  # Append Order Id directly
                current_capacities[vehicle_id] += 1
                assigned = True
                break
        if not assigned:
            print(f"Could not assign order {order}, all vehicles at capacity.")

# Create a base map centered around the supplier's location
supplier_lat = filtered_df['Warehouse Latitude'].unique()[0]
supplier_lon = filtered_df['Warehouse Longitude'].unique()[0]
base_map = folium.Map(location=[supplier_lat, supplier_lon], zoom_start=8)

# Define vehicle colors
vehicle_colors = ['blue', 'green', 'orange', 'purple']  # Adjusted for 4 vehicles

# Function to plot routes
def plot_route(route, vehicle_id):
    # Plot each point in the route
    for i in range(len(route) - 1):
        start_node = route[i]
        end_node = route[i + 1]
        
        # Get latitudes and longitudes for the start and end nodes
        if start_node == 0:  # Supplier location
            start_lat, start_lon = supplier_lat, supplier_lon
        else:
            if start_node in filtered_df['Order Id'].values:
                start_lat = filtered_df.loc[filtered_df['Order Id'] == start_node, 'Latitude'].values[0]
                start_lon = filtered_df.loc[filtered_df['Order Id'] == start_node, 'Longitude'].values[0]
            else:
                print(f"Warning: Start node {start_node} not found in filtered_df.")
                continue  # Skip this iteration

        if end_node == 0:  # Supplier location
            end_lat, end_lon = supplier_lat, supplier_lon
        else:
            if end_node in filtered_df['Order Id'].values:
                end_lat = filtered_df.loc[filtered_df['Order Id'] == end_node, 'Latitude'].values[0]
                end_lon = filtered_df.loc[filtered_df['Order Id'] == end_node, 'Longitude'].values[0]
            else:
                print(f"Warning: End node {end_node} not found in filtered_df.")
                continue  # Skip this iteration

        # Create a line between points
        folium.PolyLine(
            locations=[[start_lat, start_lon], [end_lat, end_lon]],
            color=vehicle_colors[vehicle_id % len(vehicle_colors)],  # Use modulo for colors
            weight=2.5,
            opacity=1
        ).add_to(base_map)

    # Mark the route points
    for node in route:
        if node == 0:  # Supplier location
            folium.Marker(
                location=[supplier_lat, supplier_lon],
                popup='Supplier',
                icon=folium.Icon(color='red')
            ).add_to(base_map)
        else:
            if node in filtered_df['Order Id'].values:
                lat = filtered_df.loc[filtered_df['Order Id'] == node, 'Latitude'].values[0]
                lon = filtered_df.loc[filtered_df['Order Id'] == node, 'Longitude'].values[0]
                folium.Marker(
                    location=[lat, lon],
                    popup=f'Order {node}',
                    icon=folium.Icon(color='blue')
                ).add_to(base_map)
            else:
                print(f"Warning: Order {node} not found in filtered_df.")

# Plot routes for each vehicle
for vehicle_id in range(num_vehicles):
    route = vehicle_routes[vehicle_id]
    if route:  # Only plot if there are orders in the route
        route.insert(0, 0)  # Start from the supplier (index 0)
        route.append(0)      # Return to the supplier at the end
        plot_route(route, vehicle_id)

# Display the map
base_map




In [18]:
# Print Distance traveled by Order by City routing method


def calculate_total_distance(vehicle_routes, filtered_df):
    total_distance = 0.0
    # Get supplier location
    supplier_lat = filtered_df['Warehouse Latitude'].unique()[0]
    supplier_lon = filtered_df['Warehouse Longitude'].unique()[0]
    
    for route in vehicle_routes:
        # Iterate through each pair of consecutive points in the route
        for i in range(len(route) - 1):
            start_node = route[i]
            end_node = route[i + 1]
            
            # Get coordinates for the start node
            if start_node == 0:  # Supplier location
                start_lat, start_lon = supplier_lat, supplier_lon
            else:
                start_lat = filtered_df.loc[filtered_df['Order Id'] == start_node, 'Latitude'].values[0]
                start_lon = filtered_df.loc[filtered_df['Order Id'] == start_node, 'Longitude'].values[0]
                
            # Get coordinates for the end node
            if end_node == 0:  # Supplier location
                end_lat, end_lon = supplier_lat, supplier_lon
            else:
                end_lat = filtered_df.loc[filtered_df['Order Id'] == end_node, 'Latitude'].values[0]
                end_lon = filtered_df.loc[filtered_df['Order Id'] == end_node, 'Longitude'].values[0]
            
            # Calculate distance between the two nodes
            distance = haversine(start_lat, start_lon, end_lat, end_lon)
            total_distance += distance

    return total_distance

# Calculate and print the total distance
total_distance_traveled = calculate_total_distance(vehicle_routes, filtered_df)
print(f"Total distance traveled by vehicles (Order by City): {total_distance_traveled:.2f} km")

Total distance traveled by vehicles (Order by City): 31469.07 km


## Conclusion
* We can see that our route optimization algorithm reduces the total distance travelled, compared to other methods of vehicle routing. 
* pipeline can be customize inputs to focus on high value customers, or specific suppliers/shipping modes  


## Additional recommendations to reduce delivery time
* create more warehouse locations, spread out in the different regions 
* manage expectations better -> increase the delivery time for overseas orders, no same day/ next day delivery for overseas orders, 
* increase vehicle capacity and quantity for suppliers that have more orders

## Limitations
* We dont actually know the number of vehicles and their capacities used by this company. Neither do we have their original routes. So we are not able to truly verify improvements. If data was available, would have tried to run a simulation with the optimized route and compared the number of Late deliveries to verify improvement. 
* We were not able to get and use data on traffic conditions as well as specific routing. 

## Future work
* experiment with a more specific routing algorithm that takes real time traffic data as well as seasonal weather issues into consideration
