In [63]:
import numpy as np
import pandas as pd
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm

# -----------------------------
# 1. 路径配置
# -----------------------------
model_dir = '../models'
feature_dir = '../2025_A2/train/Features'
metadata_path = '../2025_A2/train/train_metadata.csv'
image_dir = '../2025_A2/train'  # 图像路径


In [64]:

# -----------------------------
# 2. 加载特征并准备验证集
# -----------------------------
color_df = pd.read_csv(os.path.join(feature_dir, 'color_histogram.csv'))
hog_df = pd.read_csv(os.path.join(feature_dir, 'hog_pca.csv'))
additional_df = pd.read_csv(os.path.join(feature_dir, 'additional_features.csv'))

def merge(meta):
    return (meta
            .merge(additional_df,  on="image_path")
            .merge(color_df, on="image_path")
            .merge(hog_df,    on="image_path"))

metadata_df = pd.read_csv(metadata_path)

X = merge(metadata_df).drop(columns=["ClassId", "image_path"])
y = metadata_df['ClassId']


In [65]:
print(X.shape)

(5488, 121)


In [66]:

# -----------------------------
# 3. 加载三个基础模型
# -----------------------------
rf_model = joblib.load(os.path.join(model_dir, 'random_forest.pkl'))
nn_model = load_model(os.path.join(model_dir, 'neural_model.h5'))
cnn_model = load_model(os.path.join(model_dir, 'cnn_model.h5'))


In [67]:

# -----------------------------
# 4. CNN输入：准备验证图像张量
# -----------------------------
image_paths_val = pd.read_csv(metadata_path)['image_path']  # 读取顺序

cnn_val_images = []
for fname in tqdm(image_paths_val, desc="加载验证图像"):
    img = load_img(os.path.join(image_dir, fname), target_size=(64, 64))
    img_array = img_to_array(img) / 255.0
    cnn_val_images.append(img_array)
cnn_val_images = np.array(cnn_val_images)


加载验证图像: 100%|██████████| 5488/5488 [00:02<00:00, 2526.98it/s]


In [68]:
print(X_val.shape)

(549, 121)


In [69]:

# -----------------------------
# 5. 各模型输出概率
# -----------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

proba_rf = rf_model.predict_proba(X)
proba_nn = nn_model.predict(X_scaled)
proba_cnn = cnn_model.predict(cnn_val_images)




In [76]:

# -----------------------------
# 6. 构造 stacking 输入 + 训练 meta learner
# -----------------------------
X_meta = np.concatenate([proba_rf, proba_nn, proba_cnn], axis=1)
y_meta = y

meta_learner = LogisticRegression(max_iter=1000)
meta_learner.fit(X_meta, y_meta)

y_pred = meta_learner.predict(X_meta)
acc = accuracy_score(y_meta, y_pred)
print(f"✅ Stacking Validation Accuracy: {acc:.4f}")


✅ Stacking Validation Accuracy: 0.9956


In [78]:
feature_dir_test = '../2025_A2/test/Features'
meta_path_test = '../2025_A2/test/test_metadata.csv'

color_df_test = pd.read_csv(f"{feature_dir_test}/color_histogram.csv")
hog_df_test = pd.read_csv(f"{feature_dir_test}/hog_pca.csv")
additional_df_test = pd.read_csv(f"{feature_dir_test}/additional_features.csv")
def merge(meta):
    return (meta
            .merge(additional_df_test,  on="image_path")
            .merge(color_df_test, on="image_path")
            .merge(hog_df_test,    on="image_path"))

metadata_df_test = pd.read_csv(meta_path_test)

X_test = merge(metadata_df_test).drop(columns=["ClassId", "image_path"])
proba_rf_test = rf_model.predict_proba(X_test)

X_scaled_test = scaler.fit_transform(X_test)
proba_nn_test = nn_model.predict(X_scaled_test)

image_dir_test = '../2025_A2/test'

image_paths_val_test = pd.read_csv(meta_path_test)['image_path']  # 读取顺序
cnn_val_images_test = []
for fname in tqdm(image_paths_val_test, desc="加载验证图像"):
    img = load_img(os.path.join(image_dir_test, fname), target_size=(64, 64))
    img_array = img_to_array(img) / 255.0
    cnn_val_images_test.append(img_array)
cnn_val_images_test = np.array(cnn_val_images_test)
proba_cnn_test = cnn_model.predict(cnn_val_images_test)


X_meta_test = np.concatenate([proba_rf_test, proba_nn_test, proba_cnn_test], axis=1)
y_pred_test = meta_learner.predict(X_meta_test)
test_ids = pd.read_csv(meta_path_test)['id']  # eg: 1, 3105, 136
submission = pd.DataFrame({
    'id': test_ids,
    'ClassId': y_pred_test
})
submission.to_csv('../stacking_model_result.csv', index=False)



加载验证图像: 100%|██████████| 2353/2353 [00:00<00:00, 4830.06it/s]

 1/74 [..............................] - ETA: 2s




