In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")

# 1. Load dữ liệu
df = pd.read_csv("cleaned_data_sorted.csv")

# 2. Lọc một dòng xe để test (ví dụ: Toyota Vios)
brand = "Toyota"
model = "zace"

df_filtered = df[
    (df["brand"].str.lower() == brand.lower()) &
    (df["model"].str.lower() == model.lower())
].dropna(subset=["manufacture_date", "mileage_v2", "price"])

# 3. Tạo biến X và y
X = df_filtered[["manufacture_date", "mileage_v2"]]
y = df_filtered["price"]

# 4. Chia train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Huấn luyện mô hình
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results.append({
        "Model": name,
        "MAE": round(mae, 0),
        "R² Score": round(r2, 3)
    })

# 6. Hiển thị kết quả
results_df = pd.DataFrame(results)
print("🔍 So sánh mô hình dự đoán giá cho", brand, model)
print(results_df)


🔍 So sánh mô hình dự đoán giá cho Toyota XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.1, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=3,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=100,
             n_jobs=None, num_parallel_tree=None, ...)
               Model         MAE  R² Score
0  Linear Regression  26070676.0     0.281
1      Random Forest   4747137.0     0.943
2            XGBoost  12102383.0     0.865
