In [13]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# 设置路径
train_feature_dir = '../2025_A2/train/Features'
train_metadata_path = '../2025_A2/train/train_metadata.csv'

In [14]:

# 读取特征数据
color_df = pd.read_csv(os.path.join(train_feature_dir, 'color_histogram.csv'))
hog_df = pd.read_csv(os.path.join(train_feature_dir, 'hog_pca.csv'))
additional_df = pd.read_csv(os.path.join(train_feature_dir, 'additional_features.csv'))

def merge(meta):
    return (meta
            .merge(additional_df,  on="image_path")
            .merge(color_df, on="image_path")
            .merge(hog_df,    on="image_path"))

metadata_df = pd.read_csv(train_metadata_path)

# 合并特征
X = merge(metadata_df).drop(columns=["ClassId", "image_path"])

print(X.dtypes)  # 确保所有列都是 float 或 int 类型

id                int64
edge_density    float64
mean_b          float64
mean_g          float64
mean_r          float64
                 ...   
hog_pca_15      float64
hog_pca_16      float64
hog_pca_17      float64
hog_pca_18      float64
hog_pca_19      float64
Length: 121, dtype: object


In [15]:
# 读取标签
metadata_df = pd.read_csv(train_metadata_path)
y = metadata_df['ClassId']
y_encoded = to_categorical(y, num_classes=43)

# 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 划分训练集与验证集
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_encoded, test_size=0.1, random_state=42)

In [16]:
# 搭建神经网络模型
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.4),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(43, activation='softmax')  # 43类
])

# 编译模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 训练模型
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=25, batch_size=64)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x29b3a5ce650>

In [17]:
# 评估验证集准确率
val_loss, val_acc = model.evaluate(X_val, y_val)
print(f"\nValidation Accuracy: {val_acc:.4f}")


Validation Accuracy: 0.8397


In [18]:
# 保存模型（可选）
model.save('../models/neural_model.h5')