In [30]:
import tensorflow as tf  
import numpy as np       
import pandas as pd        
from matplotlib import pyplot as plt  
import seaborn as sns  
%matplotlib inline     
import time              
from tensorflow.keras.callbacks import History        
from tensorflow.keras.callbacks import ReduceLROnPlateau, TensorBoard       
from tensorflow.keras import metrics          
from sklearn.metrics import confusion_matrix          
from keras.callbacks import TensorBoard     
import pickle

In [31]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [32]:
# Used to set the default drawing style of the Seaborn library
sns.set()

In [33]:
# Used to get the version number of the TensorFlow library
tf.__version__

'2.15.0'

In [34]:
# Import the necessary libraries
import platform
import tensorflow as tf

# Print Python version
print("Python version: ", platform.python_version())

# Print the TensorFlow version
print("TensorFlow version: ", tf.__version__)

# Print the current working directory
import os
print("Current working directory: ", os.getcwd())


Python version:  3.11.7
TensorFlow version:  2.15.0
Current working directory:  /root/.jupyter/张彤/模型


In [35]:
# View Linux system information This command is used in Linux to obtain the system kernel and other information. Executing this command returns detailed information about the operating system, including the kernel version, system architecture, and so on.
!uname -a

Linux w3q2ulc9.vm 5.15.0-60-generic #66-Ubuntu SMP Fri Jan 20 14:29:49 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux


In [36]:
# This command is used to view the status of the NVIDIA GPU in the current system. Executing this command will display various information about the GPU, such as model, driver version, temperature, memory usage, and so on. If an NVIDIA GPU is present on the system and the appropriate drivers and packages have been installed, this command will usually return the details of the GPU.
!nvidia-smi
# Provides some basic information about the NVIDIA GPU in the system, including the GPU name, driver version, CUDA version, and the current GPU status.

Sat Oct 26 18:55:35 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        On  | 00000000:09:00.0 Off |                  Off |
|  0%   25C    P8              17W / 450W |  22683MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 4090        On  | 00000000:0B:00.0 Off |  

In [37]:
EPOCHS = 50        # Training rounds
IMAGE_SIZE = (224,224)    # Input image size
IMAGE_PATH ="../data"
LEARNING_RATE = 1e-4          # Learning rate
BATCH_SIZE = 64            # Each time the model is trained, 64 samples are used for calculation.

In [38]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    IMAGE_PATH,
    validation_split=0.2,   # The data set is divided into a training set and a validation set, with 80% used for training and 20% for validation.
    subset="training",    
    seed=123,                # Random seeds are set to ensure that the results of each partition of the data set are deterministic, so as to ensure the repeatability of the experimental results.
    image_size=IMAGE_SIZE,     # Set the image size to (224, 224)
    batch_size=BATCH_SIZE      # Set the batch size to 64
)

Found 14080 files belonging to 100 classes.
Using 11264 files for training.


In [39]:
vaild_ds = tf.keras.utils.image_dataset_from_directory(
    IMAGE_PATH,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

Found 14080 files belonging to 100 classes.
Using 2816 files for validation.


In [40]:
class_names = train_ds.class_names
class_names

['1',
 '10',
 '100',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '2',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '3',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '4',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '5',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '6',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '7',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '8',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '9',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 '96',
 '97',
 '98',
 '99']

In [41]:
# Define the mean and standard deviation
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Convert the mean and standard deviation to TensorFlow tensors
mean_tensor = tf.constant(mean, dtype=tf.float32)
std_tensor = tf.constant(std, dtype=tf.float32)

# Define a function to perform a standardized operation
def normalize_image(image):
    return (image - mean_tensor) / std_tensor


In [42]:
# Image enhancement definition
train_image_augment = tf.keras.Sequential(
    [
        tf.keras.layers.Rescaling(1 / 255.0), # normalization
        tf.keras.layers.RandomRotation(factor=0.2), # Random rotation
        tf.keras.layers.RandomFlip(), # Random flip
    ]
)

valid_image_augment = tf.keras.Sequential(
    [
        tf.keras.layers.Rescaling(1 / 255.0), # normalization
    ]
)

In [43]:
# Input processing method
def process_train_input(images, labels):
    return train_image_augment(images), labels

def process_valid_input(images, labels):
    return valid_image_augment(images), labels

In [44]:
# one-hot encoding and type conversion
def convert_types_and_encode(x, y):
    y = tf.cast(y, tf.int32)  # Convert the label type to an integer
    y_one_hot = tf.one_hot(y, 100)  # Apply one-hot encoding for 100 classes (replace 100 with actual class number)
    return x, y_one_hot  


In [45]:
# Apply data set preprocessing
train_ds = train_ds.map(convert_types_and_encode) # Convert data types and encodings
train_ds = train_ds.map(process_train_input, num_parallel_calls=tf.data.AUTOTUNE) # Application enhancement
train_ds = train_ds.prefetch(tf.data.AUTOTUNE) # Optimized loading

vaild_ds = vaild_ds.map(convert_types_and_encode) # Convert data types and encodings
vaild_ds = vaild_ds.map(process_valid_input, num_parallel_calls=tf.data.AUTOTUNE) # Application enhancement
vaild_ds = vaild_ds.prefetch(tf.data.AUTOTUNE) # Optimized loading

In [46]:
base_model = tf.keras.applications.VGG19(include_top=False,weights='imagenet',input_shape=(*IMAGE_SIZE,3))

In [47]:
for layer in base_model.layers:
  print(layer.name)

input_2
block1_conv1
block1_conv2
block1_pool
block2_conv1
block2_conv2
block2_pool
block3_conv1
block3_conv2
block3_conv3
block3_conv4
block3_pool
block4_conv1
block4_conv2
block4_conv3
block4_conv4
block4_pool
block5_conv1
block5_conv2
block5_conv3
block5_conv4
block5_pool


In [48]:
trainable = False
for layer in base_model.layers:
    if layer.name == 'block5_conv1':
        trainable = True
    layer.trainable = trainable

In [49]:
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation='softmax'),
])

In [50]:
# tf.keras.utils.plot_model(model)

In [51]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 7, 7, 512)         20024384  
                                                                 
 flatten_1 (Flatten)         (None, 25088)             0         
                                                                 
 dense_2 (Dense)             (None, 1024)              25691136  
                                                                 
 batch_normalization_1 (Bat  (None, 1024)              4096      
 chNormalization)                                                
                                                                 
 dense_3 (Dense)             (None, 100)               102500    
                                                                 
Total params: 45822116 (174.80 MB)
Trainable params: 35234916 (134.41 MB)
Non-trainable params: 10587200 (40.39 MB)
____

In [52]:
# Definition optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

# Defined loss function
loss_fn = tf.keras.losses.CategoricalCrossentropy()

# Define evaluation indicators
metrics = [
    tf.keras.metrics.CategoricalAccuracy(),
    tf.keras.metrics.Precision(),
    tf.keras.metrics.Recall(),
]

# Compilation model
model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

In [53]:
log_dir = "../Running result/VGG19/VGG19"

In [54]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [55]:
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=2, min_lr=1e-8)

In [56]:
history = model.fit(train_ds,epochs=EPOCHS,validation_data=vaild_ds,callbacks=[lr_scheduler,tensorboard_callback])

Epoch 1/50


2024-10-26 18:55:57.098761: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2024-10-26 18:55:57.239522: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-10-26 18:55:59.111027: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f1e40a12850 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-10-26 18:55:59.111133: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-10-26 18:55:59.111150: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-10-26 18:55:59.147064: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1729940159.405287   43800 device_comp

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [59]:
import pandas as pd  # 导入 Pandas 库
import os

# 假设模型已经训练完成，并且 history 是训练过程的历史数据

# 定义保存路径
model_dir = "../Running result/VGG19"
excel_file_path = os.path.join(model_dir, "VGG19.xlsx")  # Excel 文件保存路径
model_file_path = os.path.join(model_dir, "VGG19.h5")  # 模型保存路径

# 检查保存目录是否存在，如果不存在则创建
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

# 保存训练历史到 Excel 文件
history_df = pd.DataFrame(history.history)  
history_df.to_excel(excel_file_path, index=False)
print(f"Training history saved to: {excel_file_path}")

# 保存模型到指定路径
model.save(model_file_path)
print(f"Model saved to: {model_file_path}")


Training history saved to: ../Running result/VGG19/VGG19.xlsx
Model saved to: ../Running result/VGG19/VGG19.h5


In [1]:
import os
import time
import numpy as np
import tensorflow as tf
import random
from PIL import Image
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input
from tensorflow.keras.applications import VGG19

# 设置 GPU 动态内存增长
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

# ------------------ 创建预训练模型（VGG19） ------------------ #
def create_model():
    inputs = Input(shape=(224, 224, 3))

    # 使用 VGG19 预训练模型，不包括顶层
    base_model = VGG19(input_shape=(224, 224, 3), include_top=False, weights='imagenet', input_tensor=inputs)
    
    # 添加全局平均池化层和全连接层
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    outputs = Dense(100, activation='softmax')(x)  # 假设分类为100类
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

# ------------------ 获取图片文件 ------------------ #
def get_image_files(dataset_dir):
    image_files = []
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                image_files.append(os.path.join(root, file))
    return image_files

# ------------------ 处理图片 ------------------ #
def load_and_preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((224, 224))
    img_array = np.array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0
    return img_array

# ------------------ GPU 同步与缓存清理 ------------------ #
def gpu_synchronize_and_clear():
    # 强制 GPU 同步，确保所有任务完成后再计时
    tf.config.experimental.set_synchronous_execution(True)
    # 清理 GPU 缓存
    tf.keras.backend.clear_session()

# ------------------ 数据预加载 ------------------ #
def preload_all_images_to_memory(image_files):
    """预加载所有图片到内存中，避免推理时频繁进行磁盘读取。"""
    preloaded_images = {}
    for image_file in image_files:
        img_array = load_and_preprocess_image(image_file)
        preloaded_images[image_file] = img_array
    return preloaded_images

# ------------------ 推理函数 ------------------ #
def inference_with_separate_timing(model, images, num_images):
    gpu_synchronize_and_clear()  # 清理缓存并同步 GPU

    # 执行推理并计时
    t1 = time.time()
    model.predict(images)
    gpu_synchronize_and_clear()  # 再次同步 GPU

    t2 = time.time()

    inference_time = t2 - t1  # 记录推理时间
    return inference_time

# ------------------ 单张图片推理 ------------------ #
def single_image_inference(model, image_files):
    random_image_file = random.choice(image_files)
    
    # 记录图片加载和预处理时间
    image_data = load_and_preprocess_image(random_image_file)
    
    # 执行推理并记录推理时间
    inference_time = inference_with_separate_timing(model, image_data, 1)
    return inference_time, random_image_file

# ------------------ 批量图片推理 ------------------ #
def batch_image_inference_and_async_loading(model, preloaded_images, image_files, batch_size):
    #batch_images = np.vstack([preloaded_images[img] for img in random.sample(image_files, batch_size)])
    t1 = time.time()
    for img in random.sample(image_files, batch_size):
        img = preloaded_images[img]
        model.predict(img)
    # 推理过程
    #inference_time = inference_with_separate_timing(model, batch_images, batch_size)
    t2 = time.time()
    inference_time = t2 - t1
    average_time_per_image = inference_time / batch_size
    return inference_time, average_time_per_image

# ------------------ 主程序入口 ------------------ #
if __name__ == "__main__":
    dataset_dir = "../data"  # 设置数据集目录
    model = create_model()

    # 模型预热
    model(np.zeros((1, 224, 224, 3)))

    # 获取图片文件
    image_files = get_image_files(dataset_dir)

    # 预加载所有图片到内存
    preloaded_images = preload_all_images_to_memory(image_files)

    # 只计算1张、500张和1000张的推理时间
    for batch_size in [1, 500, 1000]:
        if batch_size == 1:
            inference_time, image_used = single_image_inference(model, image_files)
            print(f"Single image inference time: {inference_time * 1000:.4f} ms")
        else:
            inference_time, avg_time_per_image = batch_image_inference_and_async_loading(model, preloaded_images, image_files, batch_size)
            print(f"Batch of {batch_size} images inference time: {inference_time * 1000:.4f} ms")
            print(f"Average time per image in batch of {batch_size}: {avg_time_per_image * 1000:.4f} ms")


2024-10-26 19:38:09.337204: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-26 19:38:09.337459: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-26 19:38:09.452531: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Single image inference time: 448.2975 ms
Batch of 500 images inference time: 47123.4374 ms
Average time per image in batch of 500: 94.2469 ms
Batch of 1000 images inference time: 95258.8298 ms
Average time per image in batch of 1000: 95.2588 ms
