In [3]:
import pandas as pd

data = [
    [2.0, 3, "morning", "low", 3, "on_time"],
    [5.5, 2, "night", "high", 1, "late"],
    [1.2, 1, "afternoon", "low", 2, "early"],
    [3.0, 5, "night", "medium", 4, "on_time"],
    [7.8, 6, "morning", "high", 1, "late"],
    [2.5, 2, "afternoon", "medium", 5, "on_time"],
    [4.0, 3, "night", "high", 3, "late"],
    [1.5, 1, "morning", "low", 2, "early"],
    [6.5, 4, "night", "medium", 1, "late"],
    [3.2, 2, "afternoon", "medium", 3, "on_time"],
    [2.0, 5, "morning", "low", 4, "early"],
    [5.8, 6, "night", "high", 2, "late"],
    [3.5, 3, "afternoon", "medium", 3, "on_time"],
    [1.0, 1, "morning", "low", 5, "early"],
    [6.0, 4, "night", "high", 2, "late"],
    [2.8, 2, "afternoon", "medium", 3, "on_time"],
]

columns = ["distance_km", "order_size", "order_time", "traffic", "rider_experience", "status"]

df = pd.DataFrame(data, columns=columns)
df


Unnamed: 0,distance_km,order_size,order_time,traffic,rider_experience,status
0,2.0,3,morning,low,3,on_time
1,5.5,2,night,high,1,late
2,1.2,1,afternoon,low,2,early
3,3.0,5,night,medium,4,on_time
4,7.8,6,morning,high,1,late
5,2.5,2,afternoon,medium,5,on_time
6,4.0,3,night,high,3,late
7,1.5,1,morning,low,2,early
8,6.5,4,night,medium,1,late
9,3.2,2,afternoon,medium,3,on_time


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Split features and labels
X = df.drop("status", axis=1)
y = df["status"]

# Separate numeric and categorical columns
numeric_features = ["distance_km", "order_size", "rider_experience"]
categorical_features = ["order_time", "traffic"]

# Preprocessing setup
numeric_transformer = Pipeline([("scaler", StandardScaler())])
categorical_transformer = Pipeline([("onehot", OneHotEncoder(handle_unknown="ignore"))])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, categorical_features)
])

# Model pipeline
model = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# Train
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

       early       1.00      1.00      1.00         1
        late       1.00      0.50      0.67         2
     on_time       0.50      1.00      0.67         1

    accuracy                           0.75         4
   macro avg       0.83      0.83      0.78         4
weighted avg       0.88      0.75      0.75         4

Confusion Matrix:
 [[1 0 0]
 [0 1 1]
 [0 0 1]]
