In [7]:
%pip install pandas numpy scikit-learn joblib

Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

In [9]:
data = pd.read_csv('Food_Delivery_Times.csv')
data.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


In [10]:
data.isnull().sum()

Order_ID                   0
Distance_km                0
Weather                   30
Traffic_Level             30
Time_of_Day               30
Vehicle_Type               0
Preparation_Time_min       0
Courier_Experience_yrs    30
Delivery_Time_min          0
dtype: int64

In [11]:
data.dropna(inplace=True)
data.isnull().sum()

Order_ID                  0
Distance_km               0
Weather                   0
Traffic_Level             0
Time_of_Day               0
Vehicle_Type              0
Preparation_Time_min      0
Courier_Experience_yrs    0
Delivery_Time_min         0
dtype: int64

In [12]:
X = data.drop(columns=['Order_ID', 'Delivery_Time_min'])
y = data['Delivery_Time_min']

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
numeric_features = ['Distance_km', 'Preparation_Time_min', 'Courier_Experience_yrs']
categorical_features = ['Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type']

# Создание пайплайна для предобработки данных
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Создание модели с использованием RandomForest
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', GradientBoostingRegressor(random_state=42))
])

In [14]:
model.fit(X_train, y_train)

# Сохранение модели
joblib.dump(model, 'delivery_time_model.pkl')

['delivery_time_model.pkl']

In [15]:
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

Mean Absolute Error (MAE): 6.48
Mean Squared Error (MSE): 87.36
Root Mean Squared Error (RMSE): 9.35


In [16]:
def predict_delivery_time(input_data):
    """Принимает на вход DataFrame с параметрами заказов и возвращает прогнозы времени доставки."""
    # Загрузка обученной модели
    loaded_model = joblib.load('delivery_time_model.pkl')
    predictions = loaded_model.predict(input_data)
    return predictions

# Пример инференса
sample_data = pd.DataFrame({
    'Distance_km': [5.0],
    'Weather': ['Clear'],
    'Traffic_Level': ['Medium'],
    'Time_of_Day': ['Afternoon'],
    'Vehicle_Type': ['Bike'],
    'Preparation_Time_min': [15],
    'Courier_Experience_yrs': [2.0]
})

predicted_time = predict_delivery_time(sample_data)
print(f"Predicted Delivery Time: {predicted_time[0]:.2f} minutes")

Predicted Delivery Time: 35.83 minutes
