# Modeling

---

## 1. Introduction

- Describe the modeling objectives.
- Specify the target variable(s) and the type of models you intend to use.

## 2. Loading and Preparing Data

```python
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the engineered dataset
df = pd.read_csv('../data/processed/engineered_data.csv')

# Select features and target variable
X = df[['Distance Traveled (miles)', 'Cost per Mile', 'Delivery Efficiency']]
y = df['Total Operational Cost (USD)']


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Initialize and train the model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"Root Mean Squared Error: {rmse}")


In [None]:
import matplotlib.pyplot as plt

# Plot feature importances
feature_importances = model.feature_importances_
plt.barh(X.columns, feature_importances)
plt.xlabel("Importance")
plt.title("Feature Importance")
plt.show()
