In [36]:
import numpy as np
import pandas as pd
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm

# -----------------------------
# 1. 路径配置
# -----------------------------
model_dir = '../models'
feature_dir = '../2025_A2/train/Features'
metadata_path = '../2025_A2/train/train_metadata.csv'
image_dir = '../2025_A2/train'  # 图像路径


In [37]:

# -----------------------------
# 2. 加载特征并准备验证集
# -----------------------------
color_df = pd.read_csv(os.path.join(feature_dir, 'color_histogram.csv'))
hog_df = pd.read_csv(os.path.join(feature_dir, 'hog_pca.csv'))
additional_df = pd.read_csv(os.path.join(feature_dir, 'additional_features.csv'))

def merge(meta):
    return (meta
            .merge(additional_df,  on="image_path")
            .merge(color_df, on="image_path")
            .merge(hog_df,    on="image_path"))

metadata_df = pd.read_csv(metadata_path)

X = merge(metadata_df).drop(columns=["ClassId", "image_path"])
y = metadata_df['ClassId']
y_cat = to_categorical(y, num_classes=43)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_cat, test_size=0.1, random_state=42)


In [38]:
print(X_scaled.shape)

(5488, 121)


In [39]:

# -----------------------------
# 3. 加载三个基础模型
# -----------------------------
rf_model = joblib.load(os.path.join(model_dir, 'random_forest.pkl'))
nn_model = load_model(os.path.join(model_dir, 'neural_model.h5'))
cnn_model = load_model(os.path.join(model_dir, 'cnn_model.h5'))


In [40]:

# -----------------------------
# 4. CNN输入：准备验证图像张量
# -----------------------------
image_paths = pd.read_csv(os.path.join(feature_dir, 'color_histogram.csv'))['image_path']  # 读取顺序
image_paths_val = image_paths.iloc[X_val.shape[0]*-1:].values  # 只取验证集对应图像路径

cnn_val_images = []
for fname in tqdm(image_paths_val, desc="加载验证图像"):
    img = load_img(os.path.join(image_dir, fname), target_size=(64, 64))
    img_array = img_to_array(img) / 255.0
    cnn_val_images.append(img_array)
cnn_val_images = np.array(cnn_val_images)


加载验证图像: 100%|██████████| 549/549 [00:00<00:00, 4551.66it/s]


In [41]:
print(X_val.shape)

(549, 121)


In [42]:

# -----------------------------
# 5. 各模型输出概率
# -----------------------------
proba_rf = rf_model.predict_proba(X_val)
proba_nn = nn_model.predict(X_val)
proba_cnn = cnn_model.predict(cnn_val_images)






In [43]:

# -----------------------------
# 6. 构造 stacking 输入 + 训练 meta learner
# -----------------------------
X_meta = np.concatenate([proba_rf, proba_nn, proba_cnn], axis=1)
y_meta = y_val.argmax(axis=1)

meta_learner = LogisticRegression(max_iter=1000)
meta_learner.fit(X_meta, y_meta)

y_pred = meta_learner.predict(X_meta)
acc = accuracy_score(y_meta, y_pred)
print(f"✅ Stacking Validation Accuracy: {acc:.4f}")


✅ Stacking Validation Accuracy: 0.8761
