In [1]:
import pandas as pd
import numpy as np
import joblib
import re
from math import radians, sin, cos, sqrt, atan2
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error, r2_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import matplotlib as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
print("=== TRAIN DELIVERY SPEED (DS) ===")
df_ds = pd.read_csv('delivery speed - uds-orders-aug2024.csv')
df_ds = df_ds.dropna(subset = ['deliveredAt', 'createdAt'])
df_ds['deliveredAt'] = pd.to_datetime(df_ds['deliveredAt'], errors = 'coerce')
df_ds['createdAt'] = pd.to_datetime(df_ds['createdAt'], errors = 'coerce')
df_ds = df_ds.dropna(subset=['createdAt', 'deliveredAt'])
df_ds['delivery_duration_hours'] = (df_ds['deliveredAt'] - df_ds['createdAt']).dt.total_seconds() / 3600
df_ds = df_ds[df_ds['delivery_duration_hours'].between(0.1, 72)]
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    return 2 * R * atan2(sqrt(a), sqrt(1 - a))
def extract_district(address):
    if pd.isna(address):
        return 'unknown'
    try: 
        match = re.search(r"(Quận|Q\.?|Huyện|H\.?)\s*([\w\s\dĐđÂâĂăÊêÔôƠơƯư]+)", str(address))
        if match:
            return match.group(0).strip()
    except re.error:
        return "Unknown"
    return "Unknown"
if 'senderAddress' in df_ds.columns and 'receiverAddress' in df_ds.columns:
    df_ds["sender_district"] = df_ds['senderAddress'].apply(extract_district)
    df_ds['receiver_district'] = df_ds['receiverAddress'].apply(extract_district)
else:
    df_ds['sender_district'] = "Unknown"
    df_ds['receiver_district'] = "Unknown"
df_ds['hour_of_day'] = df_ds['createdAt'].dt.hour
df_ds['day_of_week'] = df_ds['createdAt'].dt.dayofweek
df_ds['is_weekend'] = df_ds['day_of_week'].isin([5,6]).astype(int)
# encoders for district
le_sender = LabelEncoder()
le_receiver = LabelEncoder()
df_ds['sender_district'] = le_sender.fit_transform(df_ds['sender_district'].astype(str))
df_ds['receiver_district'] = le_receiver.fit_transform(df_ds['receiver_district'].astype(str))  # FIXED: use le_receiver here
features_ds = ['shippingDistance', 'hour_of_day', 'day_of_week', 'sender_district', 'receiver_district', 'is_weekend']
X_ds = df_ds[features_ds].copy()
y_ds = df_ds['delivery_duration_hours']
scaler_ds = StandardScaler()
X_ds_scaled = scaler_ds.fit_transform(X_ds)

X_train_ds, X_test_ds, y_train_ds, y_test_ds = train_test_split(X_ds_scaled, y_ds, test_size=0.2, random_state=42)
model_DS = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)
model_DS.fit(X_train_ds, y_train_ds)
y_pred_ds = model_DS.predict(X_test_ds)
mae = mean_absolute_error(y_test_ds, y_pred_ds)
r2 = r2_score(y_test_ds, y_pred_ds)
print(f"DS trained — MAE: {mae:.3f}h, R2: {r2:.3f}")
# save artifacts for DS
joblib.dump(model_DS, 'model_DS.pkl')
joblib.dump(scaler_ds, 'scaler_DS.pkl')
joblib.dump(le_sender, 'le_sender.pkl')
joblib.dump(le_receiver, 'le_receiver.pkl')
print("Saved DS artifacts: model_DS.pkl, scaler_DS.pkl, le_sender.pkl, le_receiver.pkl\n")


=== TRAIN DELIVERY SPEED (DS) ===
DS trained — MAE: 2.459h, R2: 0.520
Saved DS artifacts: model_DS.pkl, scaler_DS.pkl, le_sender.pkl, le_receiver.pkl



In [3]:
print("=== TRAIN DELIVERY RELIABILITY (DR) ===")
df_dr = pd.read_csv('delivery reliability - DR.csv')
df_dr = df_dr.dropna()
target = 'Reached.on.Time_Y.N'  # binary 0/1 or 'Y'/'N' as in your data
X = df_dr.drop(columns=[target])
y = df_dr[target]

categorical_cols = ['Warehouse_block', 'Mode_of_Shipment', 'Product_importance', 'Gender']
numeric_cols = [col for col in X.columns if col not in categorical_cols]
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_cols),
    ('num', StandardScaler(), numeric_cols)
])
X_train_dr, X_test_dr, y_train_dr, y_test_dr = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
log_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])
log_pipeline.fit(X_train_dr, y_train_dr)
y_pred_dr = log_pipeline.predict(X_test_dr)
y_proba_dr = log_pipeline.predict_proba(X_test_dr)[:,1]
print("DR trained.")
print(classification_report(y_test_dr, y_pred_dr))
# save pipeline (easier to use later)
joblib.dump(log_pipeline, 'model_DR_pipeline.pkl')
# also save preprocessor separately (if needed)
joblib.dump(log_pipeline.named_steps['preprocessor'], 'encoder_DR.pkl')
print("Saved DR artifacts: model_DR_pipeline.pkl, encoder_DR.pkl\n")


=== TRAIN DELIVERY RELIABILITY (DR) ===
DR trained.
              precision    recall  f1-score   support

           0       0.57      0.63      0.60       887
           1       0.73      0.67      0.70      1313

    accuracy                           0.66      2200
   macro avg       0.65      0.65      0.65      2200
weighted avg       0.66      0.66      0.66      2200

Saved DR artifacts: model_DR_pipeline.pkl, encoder_DR.pkl



In [4]:
print("=== TRAIN DELIVERY FLEXIBILITY (DF) ===")
df_df = pd.read_csv('delivery flexibility - amazon_delivery.csv')
df_df.columns = [c.strip() for c in df_df.columns]

for col in ['Vehicle','Traffic','Weather','Area','Category']:
    if col in df_df.columns:
        df_df[col] = df_df[col].astype(str).str.strip().str.lower()
    else:
        df_df[col] = 'unknown'

veh_map = {'scooter': 'bike', 'van': 'truck', 'motorcycle': 'motorcycle'}
df_df['Vehicle'] = df_df['Vehicle'].map(lambda x: veh_map.get(x, x))
area_map = {'metropolitian': 'urban'}
df_df['Area'] = df_df['Area'].map(lambda x: area_map.get(x, x))

def normalize_weather(w):
    w = str(w).lower()
    if 'sun' in w: return 'sunny'
    if 'cloud' in w: return 'cloudy'
    if 'sandstorms' in w: return 'stormy'
    return w
df_df['Weather'] = df_df['Weather'].map(normalize_weather)

def normalize_traffic(t):
    t = str(t).lower()
    if any(x in t for x in ['jam', 'heavy', 'congest']): return 'jam'
    if any(x in t for x in ['low', 'light']): return 'light'
    if any(x in t for x in ['medium', 'mod']): return 'medium'
    return t
df_df['Traffic'] = df_df['Traffic'].map(normalize_traffic)

def normalize_cat(c):
    s = str(c).lower()
    if 'frag' in s or 'glass' in s: return 'fragile'
    if 'bulk' in s or 'large' in s: return 'bulky'
    return 'regular'
df_df['Category'] = df_df['Category'].map(normalize_cat)

def flexibility_score(row):
    score = 0
    if row['Vehicle'] == 'motorcycle': score += 1
    if row['Area'] == 'urban': score += 1
    if row['Weather'] in ['sunny', 'cloudy']: score += 1
    if row['Traffic'] == 'jam': score -= 1
    if row['Category'] in ['fragile','bulky']: score -= 1
    if score >= 2: return 2
    if score == 1: return 1
    return 0

df_df['flexibility_score'] = df_df.apply(flexibility_score, axis=1)
print("Flexibility distribution:\n", df_df['flexibility_score'].value_counts().sort_index())

features_df = ['Vehicle', 'Traffic', 'Area', 'Category', 'Weather']
X_df = df_df[features_df]
y_df = df_df['flexibility_score']

encoder_df = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_df_enc = encoder_df.fit_transform(X_df)

X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(X_df_enc, y_df, test_size=0.2, random_state=42, stratify=y_df)
clf_df = RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)
clf_df.fit(X_train_df, y_train_df)
y_pred_df = clf_df.predict(X_test_df)
print("DF trained. Accuracy:", (y_pred_df == y_test_df).mean())
print(classification_report(y_test_df, y_pred_df))

# save DF artifacts
joblib.dump(clf_df, 'model_DF.pkl')
joblib.dump(encoder_df, 'encoder_DF.pkl')
print("Saved DF artifacts: model_DF.pkl, encoder_DF.pkl\n")


=== TRAIN DELIVERY FLEXIBILITY (DF) ===
Flexibility distribution:
 flexibility_score
0     4380
1    15583
2    23776
Name: count, dtype: int64
DF trained. Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       876
           1       1.00      1.00      1.00      3117
           2       1.00      1.00      1.00      4755

    accuracy                           1.00      8748
   macro avg       1.00      1.00      1.00      8748
weighted avg       1.00      1.00      1.00      8748

Saved DF artifacts: model_DF.pkl, encoder_DF.pkl



In [5]:
print("=== LOAD ARTIFACTS & PREPARE ML² ===")
model_DS = joblib.load('model_DS.pkl')
scaler_DS = joblib.load('scaler_DS.pkl')
le_sender = joblib.load('le_sender.pkl')
le_receiver = joblib.load('le_receiver.pkl')

model_DR_pipeline = joblib.load('model_DR_pipeline.pkl')  # full pipeline (preprocessor + classifier)
encoder_DR = joblib.load('encoder_DR.pkl')               # preprocessor object
model_DF = joblib.load('model_DF.pkl')
encoder_DF = joblib.load('encoder_DF.pkl')

# union of features to randomly synthesize inputs for ML²
ds_features = features_ds
dr_features = X.columns.tolist()        # original DR features (before encoding)
df_features = features_df
union_features = list(dict.fromkeys(ds_features + dr_features + df_features + ['Gender', 'Warehouse_block', 'Mode_of_Shipment', 'Product_importance']))
def build_input_sample_union(context):
    # create sample with union_features, random plausible values
    s = {}
    for col in union_features:
        if col in ds_features:
            # shippingDistance sensible range (0.5 - 50 km)
            if col == 'shippingDistance':
                s[col] = np.round(np.random.uniform(0.5, 50.0), 2)
            elif col == 'hour_of_day':
                s[col] = np.random.randint(0,24)
            elif col == 'day_of_week':
                s[col] = np.random.randint(0,7)
            elif col == 'is_weekend':
                s[col] = int(s.get('day_of_week', 0) in [5,6])
            elif col in ['sender_district', 'receiver_district']:
                # will put string name then transform later
                s[col] = np.random.choice(list(le_sender.classes_))
            else:
                s[col] = np.random.random()
        elif col in df_features:
            # df categorical
            if col == 'Vehicle':
                s[col] = np.random.choice(['bike','truck','motorcycle'])
            elif col == 'Traffic':
                s[col] = np.random.choice(['jam','medium','light'])
            elif col == 'Weather':
                s[col] = np.random.choice(['sunny','fog','stormy','cloudy'])
            elif col == 'Area':
                s[col] = np.random.choice(['urban','suburban','rural'])
            elif col == 'Category':
                s[col] = np.random.choice(['regular','fragile','bulky'])
        else:
            # DR features or other generic fields
            # if categorical known:
            if col == 'Gender':
                s[col] = np.random.choice(['M','F'])
            elif col == 'Warehouse_block':
                s[col] = np.random.choice(['A','B','C','D','F'])
            elif col == 'Mode_of_Shipment':
                s[col] = np.random.choice(['Ship','Flight','Road'])
            elif col == 'Product_importance':
                s[col] = np.random.choice(['low','medium','high'])
            elif col == 'Customer_care_calls':
                s[col] = np.random.randint(0,10)
            elif col == 'Prior_purchases':
                s[col] = np.random.randint(0,10)
            elif col == 'Discount_offered':
                s[col] = np.random.randint(0,100)
            elif col == 'Customer_rating':
                s[col] = np.random.randint(1,6)
            elif col == 'Cost_of_the_Product':
                s[col] = np.random.randint(50,1000)
            elif col == 'Weight_in_gms':
                s[col] = np.random.randint(100,5000)
            elif col == 'ID':
                s[col] = np.random.randint(1000, 9999)
            else:
                s[col] = np.random.random()
    # add context
    s['weather'] = context.get('weather', 'sunny')
    s['traffic'] = context.get('traffic', 'medium')
    s['is_peak_hour'] = int(context.get('is_peak_hour', True))
    #DS
    s['sender_district'] = context.get('sender_district', 'A')
    s['receiver_district'] = context.get('sender_district', 'B')
    s['shipping_distance'] = context.get('shippingDistance', 15.0)
    s['weight'] = context.get('weight', 1200)
    s['serviceType'] = context.get('serviceType', 'standard')
    s['shipper'] = context.get('shipper', 'P1')
    #DR
    s['Agent_Age'] = context.get('Agent_Age', 28)
    s['Agent_Rating'] = context.get('Agent_Rating', 4.7)
    s['Vehicle'] = context.get('Vehicle', 'Motorcycle')
    s['Area'] = context.get('Area', 'Urban')
    s['Order_Date'] = context.get('Order_Date', '2023-01-01')
    s['Order_Time'] = context.get('Order_Time', '10:00:00')
    s['Pickup_Time'] = context.get('Pickup_Time', '10:30:00')
    #DF
    s['Warehouse_block'] = context.get('Warehouse_block', 'A')
    s['Mode_of_Shipment'] = context.get('Mode_of_Shipment', 'Road')
    s['Customer_care_calls'] = context.get('Customer_care_calls', 3)
    s['Customer_rating'] = context.get('Customer_rating', 4)
    s['Cost_of_the_Product'] = context.get('Cost_of_the_Product', 150)
    s['Prior_purchases'] = context.get('Prior_purchases', 2)
    s['Product_importance'] = context.get('Product_importance', 'medium')
    s['Gender'] = context.get('Gender', 'M')
    s['Discount_offered'] = context.get('Discount_offered', 10)
    s['Weight_in_gms'] = context.get('Weight_in_gms', 1200)
    return pd.DataFrame([s])

=== LOAD ARTIFACTS & PREPARE ML² ===


In [10]:
def predict_models_union(context, policy_id, model_DS, scaler_DS, le_sender, le_receiver, model_DR_pipeline, model_DF, encoder_DF):
    sample = build_input_sample_union(context)
    sample_ds = sample[ds_features].copy()
    try:
        sample_ds['sender_district'] = le_sender.transform(sample_ds['sender_district'].astype(str))
    except Exception:
        sample_ds['sender_district'] = 0
    try:
        sample_ds['receiver_district'] = le_receiver.transform(sample_ds['receiver_district'].astype(str))
    except Exception:
        sample_ds['receiver_district'] = 0
    sample_ds_scaled = scaler_DS.transform(sample_ds)
    ds_pred = float(model_DS.predict(sample_ds_scaled)[0])  # hours
     # --- DR input ---
    sample_dr = sample[dr_features].copy()
    # use full pipeline (preprocessor + classifier) to get probability of on-time
    dr_proba = float(model_DR_pipeline.predict_proba(sample_dr)[:,1][0])
    # --- DF input ---
    sample_df = sample[df_features].copy()
    # encoder_DF expects original df_features
    sample_df_enc = encoder_DF.transform(sample_df)
    # get predicted class (0,1,2) or we can use expected value via predict_proba
    probs_df = model_DF.predict_proba(sample_df_enc)[0]  # probabilities for classes sorted by classifier classes_
    # convert to expected numeric score: classes might be [0,1,2]
    classes = model_DF.classes_
    df_expected = float(np.sum(classes * probs_df))
    return {
        'policy': policy_id,
        'DS_total': ds_pred,
        'DR_total': dr_proba,
        'DF_total': df_expected
    }
def generate_allocations(step=0.2):
    vals = np.arange(0, 1 + 1e-9, step)
    allocs = []
    for w1, w2, w3 in product(vals, repeat=3):
        if abs(w1 + w2 + w3 - 1) < 1e-6:
            allocs.append((round(w1,2), round(w2,2), round(w3,2)))
    return allocs
def compute_models_for_allocs(context, allocations, model_DS, scaler_DS, le_sender, le_receiver,
                              model_DR_pipeline, model_DF, encoder_DF):
    policies = ['P1','P2','P3']
    per_policy = {}
    for pid in policies:
        per_policy[pid] = predict_models_union(context, pid, model_DS, scaler_DS, le_sender, le_receiver,
                                               model_DR_pipeline, model_DF, encoder_DF)
    rows = []
    for (w1,w2,w3) in allocations:
        DS_total = w1*per_policy['P1']['DS_total'] + w2*per_policy['P2']['DS_total'] + w3*per_policy['P3']['DS_total']
        DR_total = w1*per_policy['P1']['DR_total'] + w2*per_policy['P2']['DR_total'] + w3*per_policy['P3']['DR_total']
        DF_total = w1*per_policy['P1']['DF_total'] + w2*per_policy['P2']['DF_total'] + w3*per_policy['P3']['DF_total']
        rows.append({'w1':w1,'w2':w2,'w3':w3,'DS_total':DS_total,'DR_total':DR_total,'DF_total':DF_total})
    return pd.DataFrame(rows)
def pareto_front(df, cols):
    mask = np.ones(len(df), dtype=bool)
    for i, row in df.iterrows():
        dominated = ((df[cols] >= row[cols]).all(axis=1)) & ((df[cols] > row[cols]).any(axis=1))
        if dominated.any():
            mask[i] = False
    return mask
def synthesize_ml2_data(model_DS, scaler_DS, le_sender, le_receiver, model_DR_pipeline, model_DF, encoder_DF,
                        n_samples=500, step=0.1):
    rng = np.random.RandomState(42)
    records = []
    allocs = generate_allocations(step)
    for _ in range(n_samples):
        context = {
            'weather': rng.choice(['sunny','fog','stormy','cloudy']),
            'traffic': rng.choice(['jam','medium','light']),
            'is_peak_hour': bool(rng.choice([0,1], p=[0.7,0.3])),
            #DS
            'sender_district': rng.choice(['Quận 1', 'Quận 5', 'Bình Thạnh', 'Tân Bình']),
            'receiver_district': rng.choice(['Quận 1', 'Quận 5', 'Bình Thạnh', 'Tân Bình', 'Quận 10']),
            'shipping_distance': rng.uniform(5,50),
            'weight': rng.uniform(500, 5000),
            'serviceType': rng.choice(['standard', 'express']),
            #DR
            'Agent_Age': rng.randint(20,60),
            'Agent_Rating': rng.uniform(3.0, 5.0),
            'Vehicle': rng.choice(['Bike', 'Car', 'Truck']),
            'Area': rng.choice(['Urban', 'Rural']),
            'Order_Date': '2024-01-01',
            'Order_Time': rng.choice(['08:00:00', '12:00:00', '18:00:00']),
            'Pickup_Time': rng.choice(['08:30:00', '12:30:00', '18:30:00']),
            #DF
            'Warehouse_block': rng.choice(['A', 'B', 'C', 'D']),
            'Mode_of_Shipment': rng.choice(['Ship', 'Flight', 'Road']),
            'Customer_care_calls': rng.randint(1, 7),
            'Customer_rating': rng.randint(1, 5),
    'Cost_of_the_Product': rng.randint(100, 500),
    'Prior_purchases': rng.randint(0, 5),
    'Product_importance': rng.choice(['low', 'medium', 'high']),
    'Gender': rng.choice(['M', 'F']),
    'Discount_offered': rng.randint(0, 30),
    'Weight_in_gms': rng.uniform(300, 4000)
        }
        df_alloc = compute_models_for_allocs(context, allocs, model_DS, scaler_DS, le_sender, le_receiver,
                                             model_DR_pipeline, model_DF, encoder_DF)
        mask = pareto_front(df_alloc, ['DS_total','DR_total','DF_total'])
        pareto = df_alloc[mask].reset_index(drop=True)
        # choose based on context heuristics
        if context['weather'] in ('stormy','cloudy') or context['traffic']=='jam':
            chosen = pareto.loc[pareto['DR_total'].idxmax()]
        elif context['is_peak_hour']:
            chosen = pareto.loc[pareto['DS_total'].idxmax()]
        else:
            chosen = pareto.loc[pareto['DF_total'].idxmax()]
        rec = {
            **context,
            'w1': chosen['w1'], 'w2': chosen['w2'], 'w3': chosen['w3'],
            'DS_total': chosen['DS_total'], 'DR_total': chosen['DR_total'], 'DF_total': chosen['DF_total']
        }
        records.append(rec)
    return pd.DataFrame(records)

def train_ml2(model_DS, scaler_DS, le_sender, le_receiver, model_DR_pipeline, model_DF, encoder_DF):
    df_train = synthesize_ml2_data(model_DS, scaler_DS, le_sender, le_receiver, model_DR_pipeline, model_DF, encoder_DF,
                                   n_samples=1000, step=0.1)
    X = df_train[['weather','traffic','is_peak_hour','DS_total','DR_total','DF_total','w1','w2','w3']].copy()
    y = df_train[['w1','w2','w3']].apply(lambda r: f"{r.w1}_{r.w2}_{r.w3}", axis=1)
    X = pd.get_dummies(X, columns=['weather','traffic'], drop_first=True)
    X_train_m2, X_test_m2, y_train_m2, y_test_m2 = train_test_split(X, y, test_size=0.2, random_state=42)
    clf_m2 = RandomForestClassifier(n_estimators=200, random_state=42)
    clf_m2.fit(X_train_m2, y_train_m2)
    print("ML² trained. Accuracy:", clf_m2.score(X_test_m2, y_test_m2))
    return clf_m2, X.columns.tolist()
print("=== TRAIN ML² ===")
ml2_model, ml2_features = train_ml2(model_DS, scaler_DS, le_sender, le_receiver, model_DR_pipeline, model_DF, encoder_DF)
joblib.dump(ml2_model, 'ml2_model.pkl')
joblib.dump(ml2_features, 'ml2_features.pkl')
print("Saved ML² model (ml2_model.pkl) and feature list (ml2_features.pkl)\n")

ctx = {'weather':'sunny','traffic':'jam','is_peak_hour':True}
allocs = generate_allocations(0.1)
df_alloc_test = compute_models_for_allocs(ctx, allocs, model_DS, scaler_DS, le_sender, le_receiver, model_DR_pipeline, model_DF, encoder_DF)
mask = pareto_front(df_alloc_test, ['DS_total','DR_total','DF_total'])
pareto_df = df_alloc_test[mask].reset_index(drop = True)

print("=== Pareto Frontier for Context ===")
print(ctx)
print(pareto_df.round(3))

best_speed = pareto_df.loc[pareto_df['DS_total'].idxmin()]
best_reliab = pareto_df.loc[pareto_df['DR_total'].idxmax()]
best_feedback = pareto_df.loc[pareto_df['DF_total'].idxmax()]

print("\n=== 🏆 LỰA CHỌN TỐI ƯU THEO TỪNG TIÊU CHÍ ===")
print(f"- Nhanh nhất (DS thấp nhất): {dict(best_speed)}")
print(f"- Đúng hạn nhất (DR cao nhất): {dict(best_reliab)}")
print(f"- Hài lòng nhất (DF cao nhất): {dict(best_feedback)}")


# === TÍNH TRADE-OFF GIỮA CÁC LỰA CHỌN ===
def diff(a, b):
    return {k: round(b[k] - a[k], 3) for k in ['DS_total', 'DR_total', 'DF_total']}

print("\n=== ⚖️ TRADE-OFF GIỮA NHANH NHẤT vs ĐÚNG HẠN NHẤT ===")
print(diff(best_speed, best_reliab))
print("\n=== ⚖️ TRADE-OFF GIỮA NHANH NHẤT vs HÀI LÒNG NHẤT ===")
print(diff(best_speed, best_feedback))


print("\n=== ⚖️ PHƯƠNG ÁN CÂN BẰNG NHẤT (Theo Khoảng Cách Tới Điểm Lý Tưởng) ===")

# Chuẩn hóa các tiêu chí (đưa về cùng thang đo)
df_norm = pareto_df.copy()
df_norm['DS_norm'] = (df_norm['DS_total'] - df_norm['DS_total'].min()) / (df_norm['DS_total'].max() - df_norm['DS_total'].min())
df_norm['DR_norm'] = (df_norm['DR_total'] - df_norm['DR_total'].min()) / (df_norm['DR_total'].max() - df_norm['DR_total'].min())
df_norm['DF_norm'] = (df_norm['DF_total'] - df_norm['DF_total'].min()) / (df_norm['DF_total'].max() - df_norm['DF_total'].min())

# Xác định hướng tối ưu:
# - DS: càng thấp càng tốt  → giá trị lý tưởng là 0
# - DR, DF: càng cao càng tốt → giá trị lý tưởng là 1
ideal_point = np.array([0, 1, 1])
# Tính khoảng cách Euclidean tới điểm lý tưởng
df_norm['distance_to_ideal'] = np.sqrt(
    (df_norm['DS_norm'] - ideal_point[0])**2 +
    (df_norm['DR_norm'] - ideal_point[1])**2 +
    (df_norm['DF_norm'] - ideal_point[2])**2
)

# Chọn phương án có khoảng cách nhỏ nhất (gần điểm lý tưởng nhất)
best_compromise_idx = df_norm['distance_to_ideal'].idxmin()
best_compromise = pareto_df.loc[best_compromise_idx]
print("Phương án cân bằng nhất:")
print(best_compromise)


=== TRAIN ML² ===
ML² trained. Accuracy: 0.985
Saved ML² model (ml2_model.pkl) and feature list (ml2_features.pkl)

=== Pareto Frontier for Context ===
{'weather': 'sunny', 'traffic': 'jam', 'is_peak_hour': True}
     w1   w2   w3  DS_total  DR_total  DF_total
0   0.0  0.0  1.0     9.057     0.801     0.547
1   0.0  0.1  0.9     9.988     0.785     0.561
2   0.0  0.2  0.8    10.920     0.769     0.576
3   0.0  0.3  0.7    11.851     0.752     0.591
4   0.0  0.4  0.6    12.783     0.736     0.605
..  ...  ...  ...       ...       ...       ...
61  0.8  0.1  0.1    13.880     0.763     0.561
62  0.8  0.2  0.0    14.811     0.747     0.576
63  0.9  0.0  0.1    13.435     0.776     0.547
64  0.9  0.1  0.0    14.366     0.760     0.561
65  1.0  0.0  0.0    13.921     0.774     0.547

[66 rows x 6 columns]

=== 🏆 LỰA CHỌN TỐI ƯU THEO TỪNG TIÊU CHÍ ===
- Nhanh nhất (DS thấp nhất): {'w1': np.float64(0.0), 'w2': np.float64(0.0), 'w3': np.float64(1.0), 'DS_total': np.float64(9.05711417608023), '