In [1]:
import tensorflow as tf
from tensorflow.keras import layers

IMG_HEIGHT = 224
IMG_WIDTH = 224
# 数据增强
data_augmentation_pipeline = tf.keras.Sequential([
    layers.RandomFlip("horizontal", input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)), 
    layers.RandomRotation(0.2),  
    layers.RandomZoom(0.2),   
    layers.RandomContrast(0.1), 
    layers.RandomBrightness(0.1) 
    
], name="data_augmentation")

  super().__init__(**kwargs)


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.models import Model
import os 

# 迁移学习模型构建

IMG_HEIGHT = 224
IMG_WIDTH = 224

# --- 动态获取类别数量 ---
TRAIN_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\train"  

try:
    # 获取 TRAIN_DIR 下的子文件夹数量作为类别数
    class_names = [name for name in os.listdir(TRAIN_DIR) if os.path.isdir(os.path.join(TRAIN_DIR, name))]
    NUM_CLASSES = len(class_names)
    print(f"从 '{TRAIN_DIR}' 检测到 {NUM_CLASSES} 个类别: {class_names}")
except FileNotFoundError:
    raise FileNotFoundError(f"训练目录 '{TRAIN_DIR}' 未找到。请确保路径正确并已完成数据集划分。")
except Exception as e:
    print(f"获取类别数量时出错: {e}")


print("\n开始构建模型...")


input_tensor = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="input_image")

# 加载预训练的基础模型 
base_model = MobileNetV2(
    input_tensor=input_tensor,
    weights='imagenet',
    include_top=False,
)

# 冻结基础模型的权重
base_model.trainable = False



x = base_model.output

#     添加全局平均池化层 (Global Average Pooling)
x = GlobalAveragePooling2D(name="global_average_pooling")(x)

#     添加 Dropout 层
x = Dropout(0.5, name="dropout_layer")(x)


#     添加最终的输出层 (Prediction Layer)
output_tensor = Dense(NUM_CLASSES, activation='softmax', name="output_predictions")(x)

model = Model(inputs=input_tensor, outputs=output_tensor, name="GarbageClassifier_MobileNetV2")


print("\n模型选择与构建完成。")

从 'C:\Users\Lenovo\Desktop\final_split_dataset\train' 检测到 4 个类别: ['glass', 'metal', 'paper', 'plastic']

开始构建模型...


  base_model = MobileNetV2(



模型选择与构建完成。


In [None]:
import tensorflow as tf




# 图像尺寸
IMG_HEIGHT = 224
IMG_WIDTH = 224

TRAIN_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\train"     
VALIDATION_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\validation" 

BATCH_SIZE = 32


from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as model_specific_preprocess_input



print("\n开始准备数据加载器...")

#  定义一个通用的数据预处理函数

def preprocess_data(image, label, is_training=False):
    image = tf.cast(image, tf.float32)
    if is_training:
        # 对训练数据应用数据增强管道
        image = data_augmentation_pipeline(image, training=True)
    image = model_specific_preprocess_input(image)
    
    return image, label

#  创建训练数据集加载器
print(f"  从 '{TRAIN_DIR}' 创建训练数据集加载器...")
train_dataset = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    labels='inferred',          
    label_mode='categorical',  
                                
    image_size=(IMG_HEIGHT, IMG_WIDTH), # 将所有图片调整到指定尺寸
    interpolation='nearest',    
    batch_size=BATCH_SIZE,      
    shuffle=True                
)

#  创建验证数据集加载器
print(f"  从 '{VALIDATION_DIR}' 创建验证数据集加载器...")
validation_dataset = tf.keras.utils.image_dataset_from_directory(
    VALIDATION_DIR,
    labels='inferred',
    label_mode='categorical',
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=False               # 验证集和测试集通常不需要打乱
)

# 将预处理函数应用到数据管道中，并进行性能优化
AUTOTUNE = tf.data.AUTOTUNE 

# 对训练数据集应用预处理（包含数据增强）
train_dataset = train_dataset.map(lambda x, y: preprocess_data(x, y, is_training=True), 
                                  num_parallel_calls=AUTOTUNE)
# 对验证数据集应用预处理（不包含数据增强）
validation_dataset = validation_dataset.map(lambda x, y: preprocess_data(x, y, is_training=False), 
                                            num_parallel_calls=AUTOTUNE)

#  使用 .cache() 和 .prefetch() 优化数据加载性能

train_dataset = train_dataset.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(buffer_size=AUTOTUNE)

print("\n训练集和验证集的数据加载器已准备完毕并进行了性能优化。")
print(f"  - 批次大小 (Batch Size): {BATCH_SIZE}")
print(f"  - 训练集将应用数据增强和模型预处理。")
print(f"  - 验证集将只应用模型预处理。")

# (可选) 检查一下数据管道输出的形状
for image_batch, label_batch in train_dataset.take(1):
    print(f"\n检查一批数据的形状:")
    print(f"  - 图片批次形状: {image_batch.shape}")
    print(f"  - 标签批次形状: {label_batch.shape}")


开始准备数据加载器...
  从 'C:\Users\Lenovo\Desktop\final_split_dataset\train' 创建训练数据集加载器...
Found 9291 files belonging to 4 classes.
  从 'C:\Users\Lenovo\Desktop\final_split_dataset\validation' 创建验证数据集加载器...
Found 1991 files belonging to 4 classes.

训练集和验证集的数据加载器已准备完毕并进行了性能优化。
  - 批次大小 (Batch Size): 32
  - 训练集将应用数据增强和模型预处理。
  - 验证集将只应用模型预处理。

检查一批数据的形状:
  - 图片批次形状: (32, 224, 224, 3)
  - 标签批次形状: (32, 4)


In [None]:
from tensorflow.keras.optimizers import Adam

    # 编译模型
model.compile(
    optimizer=Adam(learning_rate=0.001), 
    loss='categorical_crossentropy',     
    metrics=['accuracy']                
)
print("\n模型编译完成。")


模型编译完成。


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


print("\n准备回调函数...")

#  ModelCheckpoint: 保存训练过程中表现最好的模型

checkpoint_cb = ModelCheckpoint(
    'saved_models/best_model.keras', # 使用 .keras 格式
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1 # 打印保存信息
)

#  EarlyStopping: 在验证集性能不再提升时提前停止训练，防止过拟合

early_stopping_cb = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

#  ReduceLROnPlateau: 在学习停滞时降低学习率

reduce_lr_cb = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-6, # 学习率最小不低于 1e-6
    verbose=1
)

# 将所有回调函数放入一个列表
callbacks_list = [checkpoint_cb, early_stopping_cb, reduce_lr_cb]


EPOCHS = 100

# --- 开始训练！ ---

print(f"\n即将开始训练，共计 {EPOCHS} 个 epochs...")

history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks_list
)

print("\n模型训练完成！")


准备回调函数...

即将开始训练，共计 100 个 epochs...
Epoch 1/100
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step - accuracy: 0.5816 - loss: 1.0667
Epoch 1: val_accuracy improved from -inf to 0.84229, saving model to saved_models/best_model.keras
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 318ms/step - accuracy: 0.5820 - loss: 1.0658 - val_accuracy: 0.8423 - val_loss: 0.4456 - learning_rate: 0.0010
Epoch 2/100
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step - accuracy: 0.7898 - loss: 0.5500
Epoch 2: val_accuracy improved from 0.84229 to 0.86339, saving model to saved_models/best_model.keras
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 281ms/step - accuracy: 0.7898 - loss: 0.5499 - val_accuracy: 0.8634 - val_loss: 0.3921 - learning_rate: 0.0010
Epoch 3/100
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step - accuracy: 0.8248 - loss: 0.4830
Epoch 3: val_accuracy improved from 

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model

#  加载最佳模型 

best_model_path = 'saved_models/best_model.keras'

print(f"正在从 '{best_model_path}' 加载表现最佳的模型...")
try:
    best_model = load_model(best_model_path)
    print("模型加载成功！")
except Exception as e:
    print(f"加载模型失败，请检查路径是否正确: {e}")

正在从 'saved_models/best_model.keras' 加载表现最佳的模型...
模型加载成功！


In [8]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
TEST_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\test"  

# 导入并使用你训练时用的同一个模型预处理函数
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as model_specific_preprocess_input

def preprocess_for_eval(image, label):
    image = tf.cast(image, tf.float32)
    image = model_specific_preprocess_input(image)
    return image, label

print(f"\n从 '{TEST_DIR}' 加载测试数据...")
test_dataset = tf.keras.utils.image_dataset_from_directory(
    TEST_DIR,
    labels='inferred',
    label_mode='categorical',
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=False # 测试时不需要打乱数据
)

test_dataset = test_dataset.map(preprocess_for_eval, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

print("\n在测试集上评估最终模型性能...")
test_loss, test_accuracy = best_model.evaluate(test_dataset)

print(f"\n测试集损失 (Test Loss): {test_loss:.4f}")
print(f"测试集准确率 (Test Accuracy): {test_accuracy:.4f}")


从 'C:\Users\Lenovo\Desktop\final_split_dataset\test' 加载测试数据...
Found 1992 files belonging to 4 classes.

在测试集上评估最终模型性能...
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 224ms/step - accuracy: 0.8663 - loss: 0.3756

测试集损失 (Test Loss): 0.3046
测试集准确率 (Test Accuracy): 0.8976


In [None]:
import os
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt

# 配置参数 (请根据你的项目情况进行修改)

INITIAL_MODEL_PATH = 'saved_models/best_model.keras'

FINETUNED_MODEL_SAVE_PATH = 'saved_models/best_model_finetuned.keras'

# 数据集目录路径
TRAIN_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\train"
VALIDATION_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\validation"
TEST_DIR = r"C:\Users\Lenovo\Desktop\final_split_dataset\test"

IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32


# 意味着前100层保持冻结，微调顶部的约55层。
FINE_TUNE_AT_LAYER = 100

LOW_LEARNING_RATE = 1e-5 

FINE_TUNE_EPOCHS = 30 

# 加载第一阶段训练好的模型

print("--- 微调阶段开始 ---")
print(f"\n[步骤 1/7] 正在从 '{INITIAL_MODEL_PATH}' 加载模型...")

if not os.path.exists(INITIAL_MODEL_PATH):
    print(f"错误: 模型文件未找到: {INITIAL_MODEL_PATH}")
    print("请先完成第一阶段的训练，并确保最佳模型已保存。")
    exit()


model = load_model(INITIAL_MODEL_PATH)
print("模型加载成功。")

# 准备数据加载器/数据管道 (与训练时类似)

print(f"\n[步骤 2/7] 正在准备数据加载器...")

# 导入并使用你训练时用的同一个模型预处理函数 (例如MobileNetV2的)
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as model_specific_preprocess_input
from tensorflow.keras import layers

# 定义数据增强管道 (与之前一致)
data_augmentation_pipeline = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
], name="data_augmentation")

def preprocess_data(image, label, is_training=False):
    image = tf.cast(image, tf.float32) # 确保数据类型正确
    if is_training:
        image = data_augmentation_pipeline(image, training=True)
    image = model_specific_preprocess_input(image)
    return image, label

AUTOTUNE = tf.data.AUTOTUNE

# 创建训练和验证数据集
train_dataset = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR, labels='inferred', label_mode='categorical',
    image_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, shuffle=True
).map(lambda x, y: preprocess_data(x, y, is_training=True), num_parallel_calls=AUTOTUNE).cache().prefetch(buffer_size=AUTOTUNE)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    VALIDATION_DIR, labels='inferred', label_mode='categorical',
    image_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, shuffle=False
).map(lambda x, y: preprocess_data(x, y, is_training=False), num_parallel_calls=AUTOTUNE).cache().prefetch(buffer_size=AUTOTUNE)

print("数据加载器准备完毕。")

# 解冻基础模型并设置要微调的层

print(f"\n[步骤 3/7] 正在解冻模型层以便微调...")

model.trainable = True
for layer in model.layers[:FINE_TUNE_AT_LAYER]:
    layer.trainable = False

print(f"模型前 {FINE_TUNE_AT_LAYER} 层已冻结，后续层将参与微调。")

# 使用低学习率重新编译模型

print(f"\n[步骤 4/7] 正在使用低学习率 ({LOW_LEARNING_RATE}) 重新编译模型...")

model.compile(
    optimizer=Adam(learning_rate=LOW_LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
print("模型重新编译完成。")



#  定义回调函数并开始微调

print(f"\n[步骤 5/7] 正在设置回调函数并准备开始微调...")

# 定义新的ModelCheckpoint，将微调后的最佳模型保存到新文件
finetune_checkpoint_cb = ModelCheckpoint(
    FINETUNED_MODEL_SAVE_PATH,
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1
)

# EarlyStopping 和 ReduceLROnPlateau 可以继续使用之前的定义
early_stopping_cb = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-7, verbose=1)

callbacks_list = [finetune_checkpoint_cb, early_stopping_cb, reduce_lr_cb]

# 开始微调训练
print(f"\n即将开始微调训练，最多进行 {FINE_TUNE_EPOCHS} 个 epochs...")

history_fine_tune = model.fit(
    train_dataset,
    epochs=FINE_TUNE_EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks_list
)

print("\n模型微调完成！")


print(f"\n[步骤 6/7] 正在评估微调后的最佳模型...")

# 加载微调后保存的最佳模型
try:
    finetuned_model = load_model(FINETUNED_MODEL_SAVE_PATH)

    # 创建测试集加载器
    test_dataset = tf.keras.utils.image_dataset_from_directory(
        TEST_DIR, labels='inferred', label_mode='categorical',
        image_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, shuffle=False
    ).map(lambda x, y: preprocess_data(x, y, is_training=False), num_parallel_calls=AUTOTUNE).prefetch(buffer_size=AUTOTUNE)

    # 评估
    print("\n在测试集上评估微调后的模型性能:")
    test_loss, test_accuracy = finetuned_model.evaluate(test_dataset)
    print(f"\n微调后 - 测试集损失: {test_loss:.4f}")
    print(f"微调后 - 测试集准确率: {test_accuracy:.4f}")

except Exception as e:
    print(f"评估微调后的模型时出错: {e}")



  

--- 微调阶段开始 ---

[步骤 1/7] 正在从 'saved_models/best_model.keras' 加载模型...
模型加载成功。

[步骤 2/7] 正在准备数据加载器...
Found 9291 files belonging to 4 classes.
Found 1991 files belonging to 4 classes.
数据加载器准备完毕。

[步骤 3/7] 正在解冻模型层以便微调...
模型前 100 层已冻结，后续层将参与微调。

[步骤 4/7] 正在使用低学习率 (1e-05) 重新编译模型...
模型重新编译完成。

[步骤 5/7] 正在设置回调函数并准备开始微调...

即将开始微调训练，最多进行 30 个 epochs...
Epoch 1/30
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step - accuracy: 0.8418 - loss: 0.4595
Epoch 1: val_accuracy improved from -inf to 0.88599, saving model to saved_models/best_model_finetuned.keras
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 275ms/step - accuracy: 0.8418 - loss: 0.4595 - val_accuracy: 0.8860 - val_loss: 0.3371 - learning_rate: 1.0000e-05
Epoch 2/30
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step - accuracy: 0.8325 - loss: 0.4531
Epoch 2: val_accuracy did not improve from 0.88599
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 