In [1]:
from tl_tools import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=12288)]
        )
        print("GPU memory limit set to 12GB")
    except RuntimeError as e:
        print(f"Error setting GPU memory limit: {e}")

GPU memory limit set to 12GB


In [2]:
setup_mixed_precision()
train_dir = '/Users/pimpijnenburg/Desktop/Thesis/USTC_SmokeRS_dataset/data/USTC_SmokeRS/processed/train'

train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True ,vertical_flip=True)
train =train_datagen.flow_from_directory(train_dir, color_mode= 'rgb', batch_size = 16, shuffle= True, seed = 1, target_size=(256, 256))

Mixed precision policy set to: mixed_float16

Found 4980 images belonging to 6 classes.
Found 1245 images belonging to 6 classes.


In [3]:
X_train, y_train, X_val, y_val = train_val_split(train, val_split= 0.3)

Number of batches in the training data: 312
Batch size of a single batch 16
Number of samples in the training dataset 4980

Number of training data batches with val split of 0.3: 219
Number of validation data batches: 93



2024-10-20 16:00:07.562120: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-10-20 16:00:07.562150: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-10-20 16:00:07.562155: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-10-20 16:00:07.562169: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-10-20 16:00:07.562179: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Shape of image training set: (3504, 256, 256, 3)
Shape of image validation set: (1476, 256, 256, 3)

Shape of label training set: (3504, 6)
Shape of label validation set: (1476, 6)


In [5]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def create_resnet34_like_model(input_shape=(256, 256, 3), num_classes=6):
    # Start with the ResNet50V2 model, but without the top layers
    base_model = tf.keras.applications.ResNet50V2(
        include_top=False,
        weights=None,
        input_shape=input_shape
    )
    
    # Get the output of the second residual block (before the third block)
    x = base_model.get_layer('conv3_block3_out').output
    
    # Add the final stages of ResNet34
    x = layers.Conv2D(512, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    # Add 3 residual blocks (ResNet34 has 3 blocks in its final stage)
    for i in range(3):
        shortcut = x
        x = layers.Conv2D(512, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
        x = layers.Conv2D(512, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Add()([shortcut, x])
        x = layers.Activation('relu')(x)
    
    # Global average pooling
    x = layers.GlobalAveragePooling2D()(x)
    
    # Custom output layers
    # FC 1
    x = layers.Dense(64, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # FC2
    x = layers.Dense(32, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Output layer
    outputs = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
    
    # Create the model
    model = Model(inputs=base_model.input, outputs=outputs, name='ResNet34_like_with_custom_output')
    
    return model

# Create the model
resnet34_like = create_resnet34_like_model()

In [6]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
resnet34_like.compile(optimizer=Adam(learning_rate = 0.01),
                     loss='categorical_crossentropy', 
                     metrics=['accuracy', 'F1Score'])

early_stopping = EarlyStopping(monitor='val_loss', patience=15,restore_best_weights=True, start_from_epoch=50)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-6)

In [7]:
save_freq = 10 * 110

from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_dir = '/Users/pimpijnenburg/Desktop/Thesis/USTC_SmokeRS_dataset/code/transfer_learning/checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'model_epoch_{epoch:03d}.keras'),
    save_weights_only=False, 
    save_best_only=False,     
    save_freq= save_freq, #saves after every 10 * 110 batches per epoch (10 epochs), experienced memory issues
    verbose=1
)


#history =  resnet34_like.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=1, 
#                             callbacks= [early_stopping, reduce_lr, checkpoint_callback])

# Continuing from checkpoint 

In [8]:
latest_checkpoint = '/Users/pimpijnenburg/Desktop/Thesis/USTC_SmokeRS_dataset/code/transfer_learning/checkpoints/model_epoch_040.keras'
resnet34_epoch40 = tf.keras.models.load_model(latest_checkpoint)
epoch_40_lr = float(tf.keras.backend.get_value(resnet34_epoch40.optimizer.learning_rate))
print(f'Current learning rate: {epoch_40_lr}')

resnet34_epoch40.compile(
    optimizer = Adam(learning_rate= epoch_40_lr),
    loss = 'categorical_crossentropy', 
    metrics = ['accuracy','F1Score']
)

Current learning rate: 0.001999999862164259


In [9]:
#history = resnet34_epoch40.fit(X_train, y_train, validation_data= (X_val, y_val), epochs = 100, initial_epoch= 40, 
#                               callbacks = [checkpoint_callback, reduce_lr, early_stopping])

In [10]:
latest_checkpoint = '/Users/pimpijnenburg/Desktop/Thesis/USTC_SmokeRS_dataset/code/transfer_learning/checkpoints/model_epoch_080.keras'
resnet34_epoch80= tf.keras.models.load_model(latest_checkpoint)
epoch_80_lr = float(tf.keras.backend.get_value(resnet34_epoch80.optimizer.learning_rate))
print(f'Current learning rate: {epoch_80_lr}')

resnet34_epoch80.compile(
    optimizer = Adam(learning_rate= epoch_80_lr),
    loss = 'categorical_crossentropy', 
    metrics = ['accuracy','F1Score']
)

early_stopping_e80 = EarlyStopping(monitor='val_loss', patience=11,restore_best_weights=True, start_from_epoch=50)
reduce_lr_e80 = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=8, min_lr=1e-6)

Current learning rate: 0.0003999999607913196


In [11]:
history = resnet34_epoch80.fit(X_train, y_train, validation_data= (X_val, y_val), epochs = 120, initial_epoch= 80, 
                               callbacks = [checkpoint_callback, reduce_lr_e80, early_stopping_e80])

Epoch 81/120


2024-10-20 16:00:15.442106: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 965ms/step - F1Score: 0.9287 - accuracy: 0.9310 - loss: 0.2031 - val_F1Score: 0.8676 - val_accuracy: 0.8699 - val_loss: 0.3786 - learning_rate: 4.0000e-04
Epoch 82/120
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 905ms/step - F1Score: 0.9269 - accuracy: 0.9282 - loss: 0.2142 - val_F1Score: 0.8709 - val_accuracy: 0.8733 - val_loss: 0.3614 - learning_rate: 4.0000e-04
Epoch 83/120
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 896ms/step - F1Score: 0.9387 - accuracy: 0.9395 - loss: 0.1830 - val_F1Score: 0.8703 - val_accuracy: 0.8733 - val_loss: 0.3796 - learning_rate: 4.0000e-04
Epoch 84/120
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 901ms/step - F1Score: 0.9325 - accuracy: 0.9332 - loss: 0.1931 - val_F1Score: 0.8680 - val_accuracy: 0.8699 - val_loss: 0.3671 - learning_rate: 4.0000e-04
Epoch 85/120
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [12]:
resnet34_epoch80.save('resnet34v2.keras') 