In [2]:
import pandas as pd
import numpy as np
import random
from collections import defaultdict


In [3]:
# Load the dataset
df = pd.read_csv('shipment-data.csv')

# Convert date fields to datetime
df['ShipmentDateTime'] = pd.to_datetime(df['ShipmentDateTime'])
df['RequiredDeliveryDateTime'] = pd.to_datetime(df['RequiredDeliveryDateTime'])

# Calculate shipment volume from dimensions
def calculate_volume(dimensions):
    dims = dimensions.split('x')
    return float(dims[0]) * float(dims[1]) * float(dims[2])  # in cubic centimeters

df['ShipmentVolume_cm3'] = df['ShipmentDimensions_cm'].apply(calculate_volume)

# Simplify the dataset for RL
rl_data = df[['ShipmentID', 'ShipmentWeight_kg', 'ShipmentVolume_cm3', 'ServiceLevel',
              'CustomerPriorityLevel', 'RequiredDeliveryDateTime', 'ShipmentDateTime',
              'RevenuePerShipment_USD', 'CostPerShipment_USD', 'TransportationMode']].copy()

# Map categorical variables to numeric
priority_mapping = {'Platinum': 3, 'Gold': 2, 'Silver': 1}
service_level_mapping = {'IP': 1, 'IE': 0}

rl_data['CustomerPriorityLevel'] = rl_data['CustomerPriorityLevel'].map(priority_mapping)
rl_data['ServiceLevel'] = rl_data['ServiceLevel'].map(service_level_mapping)

# Add delivery time window in hours
rl_data['DeliveryTimeWindow_hours'] = (rl_data['RequiredDeliveryDateTime'] - rl_data['ShipmentDateTime']).dt.total_seconds() / 3600


In [4]:
# Define transportation modes
transportation_modes = ['Purple Tail', 'TNT Network', 'Commercial Airlines']

# Initialize capacities and load factors (simplified for demonstration)
capacities = {
    'Purple Tail': 100000,  # in kg
    'TNT Network': 80000,
    'Commercial Airlines': 120000
}

current_load = {
    'Purple Tail': 0,
    'TNT Network': 0,
    'Commercial Airlines': 0
}

# Delivery times for transportation modes (in hours)
transit_times = {
    'Purple Tail': 72,  # 3 days
    'TNT Network': 96,  # 4 days
    'Commercial Airlines': 48  # 2 days
}


In [5]:
# Possible states (for simplicity, we discretize continuous variables)
priority_levels = [1, 2, 3]  # Silver, Gold, Platinum
service_levels = [0, 1]  # IE, IP

# Initialize Q-table
Q_table = defaultdict(lambda: np.zeros(len(transportation_modes)))


In [6]:
# Hyperparameters
alpha = 0.1       # Learning rate
gamma = 0.9       # Discount factor
epsilon = 0.1     # Exploration rate
num_episodes = 1000


In [7]:
# Training
for episode in range(num_episodes):
    # Shuffle the data for each episode
    rl_data = rl_data.sample(frac=1).reset_index(drop=True)
    
    # Reset capacities for each episode
    current_load = {mode: 0 for mode in transportation_modes}
    
    for index, row in rl_data.iterrows():
        # Define state
        state = (row['CustomerPriorityLevel'], row['ServiceLevel'])
        
        # Choose action (transportation mode)
        if random.uniform(0, 1) < epsilon:
            # Explore
            action_index = random.randint(0, len(transportation_modes) - 1)
        else:
            # Exploit
            state_values = Q_table[state]
            action_index = np.argmax(state_values)
        
        action = transportation_modes[action_index]
        
        # Calculate reward
        reward = 0
        cost = row['CostPerShipment_USD']
        revenue = row['RevenuePerShipment_USD']
        profit = revenue - cost
        
        # Check capacity constraint
        if current_load[action] + row['ShipmentWeight_kg'] <= capacities[action]:
            current_load[action] += row['ShipmentWeight_kg']
        else:
            # Capacity exceeded, apply penalty
            reward -= 1000  # Penalty for exceeding capacity
            profit = 0  # No profit if shipment can't be allocated
            current_load[action] = capacities[action]  # Set to max capacity
        
        # Check delivery time constraint
        if transit_times[action] > row['DeliveryTimeWindow_hours']:
            # Delivery will be late, apply penalty
            reward -= 500  # Penalty for late delivery
            profit = 0  # Assume customer will not pay if delivery is late
        
        # Reward is the profit minus any penalties
        reward += profit
        
        # Update Q-table
        next_state = state  # In this case, the state does not change
        best_next_action = np.argmax(Q_table[next_state])
        td_target = reward + gamma * Q_table[next_state][best_next_action]
        td_delta = td_target - Q_table[state][action_index]
        Q_table[state][action_index] += alpha * td_delta


In [8]:
# Extract the optimal policy from the Q-table
policy = {}
for state in Q_table:
    best_action_index = np.argmax(Q_table[state])
    best_action = transportation_modes[best_action_index]
    policy[state] = best_action


In [9]:
print("Optimal Policy Derived from Q-Learning:")
for state, action in policy.items():
    priority_level = {v: k for k, v in priority_mapping.items()}[state[0]]
    service_level = {v: k for k, v in service_level_mapping.items()}[state[1]]
    print(f"Customer Priority Level: {priority_level}, Service Level: {service_level} --> Assign to: {action}")


Optimal Policy Derived from Q-Learning:
Customer Priority Level: Gold, Service Level: IE --> Assign to: Purple Tail
Customer Priority Level: Platinum, Service Level: IE --> Assign to: Purple Tail
Customer Priority Level: Platinum, Service Level: IP --> Assign to: Purple Tail
Customer Priority Level: Silver, Service Level: IP --> Assign to: Commercial Airlines
Customer Priority Level: Gold, Service Level: IP --> Assign to: Purple Tail
Customer Priority Level: Silver, Service Level: IE --> Assign to: Purple Tail


In [10]:
# Reset capacities
current_load = {mode: 0 for mode in transportation_modes}
total_profit = 0
penalties = 0

for index, row in rl_data.iterrows():
    # Define state
    state = (row['CustomerPriorityLevel'], row['ServiceLevel'])
    action = policy.get(state, transportation_modes[0])  # Default to first mode if state not in policy
    
    cost = row['CostPerShipment_USD']
    revenue = row['RevenuePerShipment_USD']
    profit = revenue - cost
    
    # Check capacity constraint
    if current_load[action] + row['ShipmentWeight_kg'] <= capacities[action]:
        current_load[action] += row['ShipmentWeight_kg']
    else:
        # Capacity exceeded, apply penalty
        penalties += 1000
        profit = 0  # No profit if shipment can't be allocated
        current_load[action] = capacities[action]
    
    # Check delivery time constraint
    if transit_times[action] > row['DeliveryTimeWindow_hours']:
        # Delivery will be late, apply penalty
        penalties += 500
        profit = 0  # Assume customer will not pay if delivery is late
    
    total_profit += profit

print(f"Total Profit: USD {total_profit}")
print(f"Total Penalties: USD {penalties}")
print(f"Net Profit: USD {total_profit - penalties}")


Total Profit: USD 4300
Total Penalties: USD 0
Net Profit: USD 4300
