# Simulated Supply Chain DataFrame

In [2]:
import numpy as np
import pandas as pd
from faker import Faker

In [3]:
# Initialize Faker
fake = Faker()

# Define the number of records you want to simulate
num_records = 10000  # Increased to 10,000 for larger dataset

# Define lists for categorical variables
weather_conditions = ["Clear", "Light Rain", "Heavy Rain", "Snow", "Fog"]
traffic_conditions = ["Light", "Moderate", "Heavy", "Severe"]
experience_levels = ["Junior", "Intermediate", "Senior"]
delivery_windows = ["Morning", "Afternoon", "Evening", "Overnight"]
package_types = ["Standard", "Fragile", "Perishable", "Oversized"]
route_types = ["Interstate", "Urban", "Suburban"]
truck_types = ["Box Truck", "Semi", "Flatbed"]
satisfaction_levels = ["Very Satisfied", "Satisfied", "Neutral", "Dissatisfied", "Very Dissatisfied"]
fuel_types = ["Diesel", "Gasoline"]

# Generate a range of dates
date_range = pd.date_range(start="2023-01-01", end="2023-12-31", freq='D')

# Generate random data
data = {
    "Route ID": [fake.uuid4() for _ in range(num_records)],
    "Delivery Time (hours)": np.random.uniform(1, 10, num_records),
    "Date": np.random.choice(date_range, num_records),
    "Fuel Costs (USD)": np.random.uniform(50, 1000, num_records),  # Adjusted for realism
    "Delivery Start Time": [fake.time(pattern="%H:%M:%S") for _ in range(num_records)],
    "Distance Traveled (miles)": np.random.uniform(50, 3000, num_records),  # Broadened range
    "Estimated Distance (miles)": np.random.uniform(50, 3000, num_records),
    "Weather Conditions": np.random.choice(weather_conditions, num_records),
    "Traffic Conditions": np.random.choice(traffic_conditions, num_records),
    "Driver Ratings": np.random.uniform(1, 5, num_records),
    "Customer Satisfaction": np.random.choice(satisfaction_levels, num_records),
    "Delays (hours)": np.random.uniform(0, 5, num_records),
    "Warehouse Storage Costs (USD)": np.random.uniform(100, 500, num_records),
    "Truck Maintenance Costs (USD)": np.random.uniform(500, 2000, num_records),  # Adjusted for realism
    "Load Type": np.random.choice(package_types, num_records),
    "Load Weight (tons)": np.random.uniform(0.5, 20, num_records),
    "Route Type": np.random.choice(route_types, num_records),
    "Truck Type": np.random.choice(truck_types, num_records),
    "Driver Experience": np.random.choice(experience_levels, num_records),
    "Delivery Window": np.random.choice(delivery_windows, num_records),
    "Truck Condition": np.random.randint(1, 6, num_records),  # Rating from 1 to 5
    "Labor Costs (USD)": np.random.uniform(20, 200, num_records),
    "Fuel Type": np.random.choice(fuel_types, num_records),
    "Toll Costs (USD)": np.random.uniform(0, 50, num_records),
    "Parking Costs (USD)": np.random.uniform(0, 30, num_records),
    "Idle Time (hours)": np.random.uniform(0, 2, num_records)
}

# Additional calculated columns
data["Distance Difference (miles)"] = data["Distance Traveled (miles)"] - data["Estimated Distance (miles)"]
data["Cost per Gallon (USD)"] = np.where(np.array(data["Fuel Type"]) == "Diesel", 3.5, 3.0)
data["Total Fuel Cost (USD)"] = data["Distance Traveled (miles)"] / np.random.uniform(5, 10) * data["Cost per Gallon (USD)"]
data["Insurance Costs (USD)"] = np.where(np.array(data["Load Type"]) == "Hazardous", np.random.uniform(50, 150, num_records), np.random.uniform(20, 100, num_records))
data["Breakdown Repair Costs (USD)"] = np.where(np.array(data["Truck Condition"]) <= 2, np.random.uniform(200, 1000, num_records), 0)
data["Overtime Labor Costs (USD)"] = np.where(data["Delivery Time (hours)"] > 8, (data["Delivery Time (hours)"] - 8) * np.random.uniform(20, 40), 0)
data["Fuel Surcharge (USD)"] = data["Fuel Costs (USD)"] * np.random.uniform(0.05, 0.15)
data["Idle Cost (USD)"] = data["Idle Time (hours)"] * data["Cost per Gallon (USD)"] * 0.5  # Assuming half a gallon per hour idling

# Calculate total operational costs
data["Total Operational Cost (USD)"] = (
    data["Fuel Costs (USD)"] +
    data["Toll Costs (USD)"] +
    data["Insurance Costs (USD)"] +
    data["Parking Costs (USD)"] +
    data["Breakdown Repair Costs (USD)"] +
    data["Overtime Labor Costs (USD)"] +
    data["Fuel Surcharge (USD)"] +
    data["Idle Cost (USD)"]
)

# Additional metrics
data["Fuel Cost per Mile"] = data["Fuel Costs (USD)"] / data["Distance Traveled (miles)"]
data["Delivery Efficiency Score"] = (
    1 / (1 + data["Fuel Cost per Mile"]) *
    (1 / (1 + data["Delays (hours)"])) *
    (1 / (1 + data["Load Weight (tons)"]))
)

# Create a DataFrame
logistics_df = pd.DataFrame(data)

In [4]:
# Save the DataFrame to a CSV file
logistics_df.to_csv("logistics_df.csv", index=False)  # index=False to avoid saving the index as a column

In [5]:
# Save the DataFrame to an Excel file
logistics_df.to_excel("logistics_df.xlsx", index=False)  # index=False to avoid saving the index as a column

In [6]:
# Load the dataset
logistics_df = pd.read_csv("logistics_df.csv")

# Display first few rows
logistics_df.head()

Unnamed: 0,Route ID,Delivery Time (hours),Date,Fuel Costs (USD),Delivery Start Time,Distance Traveled (miles),Estimated Distance (miles),Weather Conditions,Traffic Conditions,Driver Ratings,...,Cost per Gallon (USD),Total Fuel Cost (USD),Insurance Costs (USD),Breakdown Repair Costs (USD),Overtime Labor Costs (USD),Fuel Surcharge (USD),Idle Cost (USD),Total Operational Cost (USD),Fuel Cost per Mile,Delivery Efficiency Score
0,89273afc-bd2f-41ed-a73d-ef0b92913719,6.834667,2023-06-19,860.972014,10:21:08,2600.678278,1347.343378,Light Rain,Severe,2.990486,...,3.5,1697.019217,96.922976,660.421071,0.0,75.199379,0.219246,1729.539837,0.331057,0.040278
1,78800389-d0f5-41e7-9ab3-4401f6c25c32,5.090882,2023-02-24,847.397208,06:44:56,1493.396998,1610.912518,Clear,Moderate,1.243085,...,3.5,974.48555,33.1812,0.0,0.0,74.013722,1.420456,1000.309343,0.567429,0.04107
2,cc5a94e9-d53b-4d84-ba13-19d045c00e21,5.179179,2023-02-12,368.294777,16:45:48,495.929796,585.832494,Clear,Severe,1.334854,...,3.5,323.608806,56.347931,0.0,0.0,32.167757,2.569551,463.841459,0.742635,0.013087
3,36b33d73-1c8e-4782-9775-84def74b3cd0,3.271822,2023-09-23,740.660213,01:47:40,2518.656016,2720.531979,Heavy Rain,Moderate,4.355089,...,3.5,1643.497273,87.362167,0.0,0.0,64.691055,1.144174,960.391265,0.29407,0.054048
4,1139533f-4a9f-4348-b05c-10a55d768e04,7.007333,2023-06-20,323.146483,13:03:09,2466.773297,1489.795422,Clear,Severe,2.61814,...,3.0,1379.693375,26.07332,0.0,0.0,28.224396,0.813373,442.445597,0.131,0.036246
