In [37]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, roc_auc_score, accuracy_score



np.random.seed(42)
n = 1000
x = pd.DataFrame({
    "route_distance_km" : np.random.uniform(100,5000,n),
    "carrier_reliability" : np.random.uniform(0.3,0.95,n),
    "port_congestion_index": np.random.uniform(0,1,n),
    "weather_risk_score" : np.random.uniform(0,1,n),
    "avg_speed_last_6h" : np.random.uniform(30,120,n)
})

y = (
    0.002 * x["route_distance_km"] -
    3 * x["carrier_reliability"] + 
    4 * x["port_congestion_index"] +
    6 * x["weather_risk_score"] + 
    np.random.normal(0,1,n)
)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size =0.2, random_state=42)
model = XGBRegressor(n_estimators = 100, max_depth=4,learning_rate =0.05,subsample =0.8, colsample_bytree = 0.8, random_state=42)
model.fit(x_train,y_train)
print("Xtrain:",x_train, "X_test: ", x_test, "Y_train: ",y_train,"y_test: ",y_test)
y_pred = model.predict(x_test)
print("Y prediction: ",y_pred)

mae = mean_absolute_error(y_test,y_pred)
print("MAE(Hours) = ", mae)
importances = model.feature_importances_
for name,imp in zip(x.columns,importances):
    print(f"{name}: {imp:.3f}")


Xtrain:      route_distance_km  carrier_reliability  port_congestion_index  \
29          327.607022             0.838795               0.654175   
535        4729.352076             0.379226               0.834145   
695        3092.038208             0.344312               0.519032   
557         241.035114             0.567169               0.299566   
836        4582.744915             0.383262               0.993255   
..                 ...                  ...                    ...   
106        2110.876323             0.557557               0.857179   
270        4065.869662             0.751738               0.104225   
860        3900.085322             0.804691               0.866770   
435        3534.748744             0.319483               0.389482   
102        1640.344307             0.874461               0.157955   

     weather_risk_score  avg_speed_last_6h  
29             0.632282          62.442705  
535            0.129679          82.919247  
695            0

In [38]:
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, accuracy_score

np.random.seed(42)
n = 1000
x = pd.DataFrame({
    "route_distance_km" : np.random.uniform(100,5000,n),
    "carrier_reliability" : np.random.uniform(0.3,0.95,n),
    "port_congestion_index": np.random.uniform(0,1,n),
    "weather_risk_score" : np.random.uniform(0,1,n),
    "avg_speed_last_6h" : np.random.uniform(30,120,n)
})

SLA_hours = 6.0

y_delay = (y>SLA_hours).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(x,y_delay,test_size= 0.2,random_state=42)

model2 = lgb.LGBMClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree = 0.8,
    random_state=42
)

model2.fit(X_train,Y_train)

y_prob = model2.predict_proba(X_test)[:,1]
y_delay_pred = (y_prob>0.5).astype(int)

auc= roc_auc_score(Y_test,y_prob)
accuracy = accuracy_score(Y_test,y_delay_pred)

print("Delay Risk AUC = ", auc)
print("Delay Risk Accuracy = ",accuracy)

for name, val in zip(x.columns, model2.feature_importances_):
    print(f"{name}: {val}")



[LightGBM] [Info] Number of positive: 559, number of negative: 241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1275
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.698750 -> initscore=0.841353
[LightGBM] [Info] Start training from score 0.841353
Delay Risk AUC =  0.9488147187168298
Delay Risk Accuracy =  0.9
route_distance_km: 1442
carrier_reliability: 1137
port_congestion_index: 1379
weather_risk_score: 1226
avg_speed_last_6h: 872
