# Delivery Time Estimation Tool

This notebook implements a tool for estimating delivery times using a Random Forest Regressor.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

In [2]:
class DeliveryTimeEstimationTool:
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100, random_state=42)
        self.scaler = StandardScaler()
        self.features = ['distance', 'package_size', 'day_of_week']
        self.target = 'delivery_time'
        self.data = None
        self.accuracy_history = []

    def load_data(self, file_path):
        self.data = pd.read_csv(file_path)
        print("Data loaded successfully.")

    def preprocess_data(self):
        if self.data is None:
            raise ValueError("No data loaded. Please load data first.")

        # Convert day of week to numerical
        self.data['day_of_week'] = pd.to_datetime(self.data['date']).dt.dayofweek

        # Normalize numerical features
        self.data[self.features] = self.scaler.fit_transform(self.data[self.features])
        
        # Store feature names after scaling
        self.feature_names_ = self.features

    def train_model(self):
        if self.data is None:
            raise ValueError("No data loaded. Please load data first.")

        X = self.data[self.features]
        y = self.data[self.target]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        self.model.fit(X_train, y_train)

        # Calculate accuracy
        y_pred = self.model.predict(X_test)
        accuracy = 1 - mean_absolute_error(y_test, y_pred) / np.mean(y_test)
        self.accuracy_history.append(accuracy)

        print(f"Model trained. Current accuracy: {accuracy:.2%}")

    def predict_delivery_time(self, distance, package_size, day_of_week):
        if self.model is None:
            raise ValueError("Model not trained. Please train the model first.")

        input_data = pd.DataFrame([[distance, package_size, day_of_week]], 
                                  columns=self.feature_names_)
        input_data_scaled = pd.DataFrame(self.scaler.transform(input_data),
                                         columns=self.feature_names_)
        prediction = self.model.predict(input_data_scaled)[0]

        return prediction


    def adjust_prediction(self, prediction, adjustment_factor):
        return prediction * adjustment_factor

    def track_performance(self):
        if not self.accuracy_history:
            print("No performance data available yet.")
        else:
            print("Accuracy history:")
            for i, acc in enumerate(self.accuracy_history):
                print(f"Iteration {i+1}: {acc:.2%}")

## Example Usage

In [3]:
# Initialize the tool
tool = DeliveryTimeEstimationTool()

# Load data (assuming you have a CSV file with the required columns)
tool.load_data("sample-delivery-data.csv")

# Preprocess and train the model
tool.preprocess_data()
tool.train_model()

Data loaded successfully.
Model trained. Current accuracy: 97.94%


In [4]:
# Make a prediction
distance = 28  # km
package_size = 3  # size category
day_of_week = 3  # Wednesday (0 = Monday, 6 = Sunday)

estimated_time = tool.predict_delivery_time(distance, package_size, day_of_week)
print(f"Estimated delivery time: {estimated_time:.2f} hours")

# Adjust prediction
adjustment_factor = 1.1  # 10% increase
adjusted_time = tool.adjust_prediction(estimated_time, adjustment_factor)
print(f"Adjusted delivery time: {adjusted_time:.2f} hours")

Estimated delivery time: 4.72 hours
Adjusted delivery time: 5.19 hours


In [5]:
# Track performance
tool.track_performance()

Accuracy history:
Iteration 1: 96.23%
