In [10]:
#DEMO: Load models + Predict

import os
import joblib
import pandas as pd
from IPython.display import display

# Đường dẫn tới model
MODEL_PATH = R'C:\Users\Admin\AdvancedPython\12423030_12423TN_HoangThiMinhTam\demo\best_models.pkl'


# Check tồn tại file
if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Không tìm thấy file: {MODEL_PATH}. Hãy đặt file model cùng thư mục với notebook demo hoặc sửa path.")


In [8]:
# Load model
best_bundle = joblib.load(MODEL_PATH)

type(best_bundle)

dict

In [9]:
# --- lấy model tốt nhất cho 2 bài toán ---
# 2 model tốt nhất
best_reg_est = best_bundle["best_reg"]["model"]   # dự đoán Demand
best_clf_est = best_bundle["best_clf"]["model"]   # dự đoán Stockout

best_reg_name = best_bundle["best_reg"]["model_name"]
best_clf_name = best_bundle["best_clf"]["model_name"]

# transformer dùng chung
scaler = best_bundle["scaler"]
ohe    = best_bundle["ohe"]

# metadata feature
num_features = best_bundle["num_features"]
bin_features = best_bundle["bin_features"]
cat_features = best_bundle["cat_features"]
cat_names    = best_bundle["cat_names"]

print("Loaded:", MODEL_PATH)
print("Best reg:", best_reg_name)
print("Best clf:", best_clf_name)

Loaded: C:\Users\Admin\AdvancedPython\12423030_12423TN_HoangThiMinhTam\demo\best_models.pkl
Best reg: XGBRegressor
Best clf: XGBClassifier


In [12]:
def build_X_final(input_df, bundle):
    num_features = bundle["num_features"]
    bin_features = bundle["bin_features"]
    cat_features = bundle["cat_features"]

    scaler = bundle["scaler"]
    ohe    = bundle["ohe"]

    # cat_names đã được lưu sẵn trong best_models.pkl 
    cat_names = bundle.get("cat_names", None)

    X_in = input_df.copy()

    # đảm bảo đủ cột
    for c in num_features + bin_features + cat_features:
        if c not in X_in.columns:
            X_in[c] = 0

    # numeric (scale)
    X_num = scaler.transform(X_in[num_features])
    X_num_df = pd.DataFrame(X_num, columns=num_features, index=X_in.index)

    # bin
    X_bin_df = X_in[bin_features].copy()

    # categorical (one-hot)
    X_cat = ohe.transform(X_in[cat_features])

    # nếu là sparse matrix thì chuyển sang dense
    if hasattr(X_cat, "toarray"):
        X_cat = X_cat.toarray()

    # nếu bundle có cat_names thì dùng luôn để đảm bảo đúng thứ tự cột
    if cat_names is None:
        cat_names = ohe.get_feature_names_out(cat_features)
    X_cat_df = pd.DataFrame(X_cat, columns=list(cat_names), index=X_in.index)

    # concat
    X_final = pd.concat([X_num_df, X_bin_df, X_cat_df], axis=1)

    # đảm bảo không thiếu cột nếu input gặp category mới/thiếu category cũ
    if cat_names is not None:
        for col in cat_names:
            if col not in X_final.columns:
                X_final[col] = 0
        X_final = X_final[num_features + bin_features + list(cat_names)]

    return X_final


In [23]:
input_data = {
    # Numeric / Binary
    "Inventory Level": 30,
    "Units Sold": 90,
    "Units Ordered": 80,
    "Price": 70,
    "Effective_Price": 70.0,
    "Discount": 0,
    "Has_Discount": 0,
    "Promotion": 0,
    "Competitor Pricing": 72.0,
    "Epidemic": 0,
    "Promo_Epidemic": 0,
    "Month": 6,
    "DayOfWeek": 2,
    "IsWeekend": 0,

    # Categorical
    "Category": "Electronics",
    "Region": "North",
    "Weather Condition": "Sunny",
    "Seasonality": "Summer"
}

input_df = pd.DataFrame([input_data])

print("\nDữ liệu đầu vào:")
display(input_df)



Dữ liệu đầu vào:


Unnamed: 0,Inventory Level,Units Sold,Units Ordered,Price,Effective_Price,Discount,Has_Discount,Promotion,Competitor Pricing,Epidemic,Promo_Epidemic,Month,DayOfWeek,IsWeekend,Category,Region,Weather Condition,Seasonality
0,30,90,80,70,70.0,0,0,0,72.0,0,0,6,2,0,Electronics,North,Sunny,Summer


In [24]:
# Demand (Regression)
X_final_reg = build_X_final(input_df.copy(), best_bundle)
pred_demand = best_reg_est.predict(X_final_reg)[0]
print("Demand dự đoán:", float(pred_demand))

# Stockout (Classification)
X_final_clf = build_X_final(input_df.copy(), best_bundle)

pred_stockout = best_clf_est.predict(X_final_clf)[0]
proba_stockout = best_clf_est.predict_proba(X_final_clf)[:, 1][0]

print("Stockout dự đoán (0/1):", int(pred_stockout))
print("Xác suất Stockout:", float(proba_stockout))

Demand dự đoán: 92.27799987792969
Stockout dự đoán (0/1): 1
Xác suất Stockout: 0.9999706745147705
