In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [35]:
df = pd.read_csv(r"C:\Projects\food-delivery-regression\data\processed\food_delivery_processed.csv")

In [36]:
threshold = df['Delivery_Time'].median()
df['Delivery_Status'] = (df['Delivery_Time'] > threshold).astype(int)

In [37]:
X = df.drop(columns=['Delivery_Time', 'Delivery_Status'])

In [38]:
Y = df['Delivery_Status']

In [39]:
X.shape, Y.shape

((200, 18), (200,))

In [40]:
x_train, x_test, y_train, y_test = train_test_split (X,Y,test_size=0.2,random_state=42,stratify=Y)

In [41]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((160, 18), (40, 18), (160,), (40,))

In [42]:
num_cols = [
    'Distance',
    'Delivery_Person_Experience',
    'Restaurant_Rating',
    'Customer_Rating',
    'Order_Cost',
    'Tip_Amount'
]

In [43]:
scaler = StandardScaler()

In [44]:
x_train[num_cols] = scaler.fit_transform(x_train[num_cols])
x_test[num_cols] = scaler.transform(x_test[num_cols])

In [45]:
x_train[num_cols].describe().round(2)

Unnamed: 0,Distance,Delivery_Person_Experience,Restaurant_Rating,Customer_Rating,Order_Cost,Tip_Amount
count,160.0,160.0,160.0,160.0,160.0,160.0
mean,0.0,-0.0,-0.0,-0.0,0.0,-0.0
std,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.64,-1.53,-1.79,-1.51,-1.65,-1.5
25%,-0.73,-0.8,-0.8,-0.93,-0.91,-0.94
50%,-0.15,-0.07,0.05,-0.06,0.0,0.04
75%,0.71,1.03,0.9,0.82,0.9,0.82
max,2.02,1.76,1.74,1.98,1.73,1.82


In [46]:
model = LogisticRegression()

In [47]:
model.fit(x_test,y_test)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [49]:
y_pred = model.predict(x_test)

In [50]:
accuracy_score(y_test, y_pred)

0.775

In [51]:
precision_score(y_test, y_pred)

0.7894736842105263

In [52]:
recall_score(y_test, y_pred)

0.75

In [53]:
f1_score(y_test, y_pred)

0.7692307692307693

In [54]:
confusion_matrix(y_test, y_pred)

array([[16,  4],
       [ 5, 15]])

In [None]:
#   The logistic regression model achieves an accuracy of 77.5% with balanced precision (0.79) and recall (0.75). This indicates the model performs
#   reasonably well in distinguishing delayed vs fast deliveries.
#   The confusion matrix shows a comparable number of false positives and false negatives, suggesting no extreme bias toward either class.  
#   Overall, the model serves as a strong baseline for delivery delay classification.