In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import json

# Define a custom callback to print messages and store history
class PrintLogger(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} started.")
    
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} ended.")
        print(f"Accuracy: {logs['accuracy']:.4f} - Loss: {logs['loss']:.4f} - Val Accuracy: {logs['val_accuracy']:.4f} - Val Loss: {logs['val_loss']:.4f}")

# Define directories
save_dir = './split_datav5_full_compressed'

# Load the split data
train_data = np.load(os.path.join(save_dir, 'train_data.npz'))
val_data = np.load(os.path.join(save_dir, 'val_data.npz'))
test_data = np.load(os.path.join(save_dir, 'test_data.npz'))

X_rp_train = train_data['X_rp']
X_gaf_train = train_data['X_gaf']
X_mtf_train = train_data['X_mtf']
y_train = train_data['y']

X_rp_val = val_data['X_rp']
X_gaf_val = val_data['X_gaf']
X_mtf_val = val_data['X_mtf']
y_val = val_data['y']

X_rp_test = test_data['X_rp']
X_gaf_test = test_data['X_gaf']
X_mtf_test = test_data['X_mtf']
y_test = test_data['y']

# Resize images to (224, 224, 3) and keep only RGB channels
def resize_images(images):
    resized_images = []
    for img in images:
        img = tf.image.resize(img, [224, 224])
        img = img[:, :, :3]  # Keep only RGB channels
        resized_images.append(img)
    return np.array(resized_images)

X_rp_train = resize_images(X_rp_train)
X_gaf_train = resize_images(X_gaf_train)
X_mtf_train = resize_images(X_mtf_train)
X_rp_val = resize_images(X_rp_val)
X_gaf_val = resize_images(X_gaf_val)
X_mtf_val = resize_images(X_mtf_val)
X_rp_test = resize_images(X_rp_test)
X_gaf_test = resize_images(X_gaf_test)
X_mtf_test = resize_images(X_mtf_test)

# Define a function to build the ResNet50 model
def build_resnet50_model(input_shape, num_classes):
    inputs_rp = Input(shape=input_shape, name='input_rp')
    inputs_gaf = Input(shape=input_shape, name='input_gaf')
    inputs_mtf = Input(shape=input_shape, name='input_mtf')
    
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Create separate models for each input with unique names
    base_rp = Model(inputs=inputs_rp, outputs=base_model(inputs_rp), name='resnet50_rp')
    base_gaf = Model(inputs=inputs_gaf, outputs=base_model(inputs_gaf), name='resnet50_gaf')
    base_mtf = Model(inputs=inputs_mtf, outputs=base_model(inputs_mtf), name='resnet50_mtf')
    
    x_rp = base_rp.output
    x_rp = GlobalAveragePooling2D()(x_rp)
    
    x_gaf = base_gaf.output
    x_gaf = GlobalAveragePooling2D()(x_gaf)
    
    x_mtf = base_mtf.output
    x_mtf = GlobalAveragePooling2D()(x_mtf)
    
    merged = Concatenate()([x_rp, x_gaf, x_mtf])
    
    x = Dense(512, activation='relu', name='dense_512')(merged)
    x = Dropout(0.5, name='dropout_512')(x)
    x = Dense(128, activation='relu', name='dense_128')(x)
    x = Dropout(0.5, name='dropout_128')(x)
    outputs = Dense(num_classes, activation='softmax', name='output')(x)
    
    model = Model(inputs=[inputs_rp, inputs_gaf, inputs_mtf], outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 5

# Build ResNet50 model
resnet50_model = build_resnet50_model(input_shape, num_classes)
resnet50_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
resnet50_checkpoint = ModelCheckpoint(filepath=os.path.join(save_dir, 'resnet50_best_model.keras'), monitor='val_loss', save_best_only=True)

# Train ResNet50 model
resnet50_history = resnet50_model.fit(
    [X_rp_train, X_gaf_train, X_mtf_train], y_train,
    epochs=10, batch_size=32, validation_data=([X_rp_val, X_gaf_val, X_mtf_val], y_val),
    callbacks=[early_stopping, resnet50_checkpoint, PrintLogger()],
    verbose=1
)

# Evaluate ResNet50 model on the test set
resnet50_model.load_weights(os.path.join(save_dir, 'resnet50_best_model.keras'))
resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate([X_rp_test, X_gaf_test, X_mtf_test], y_test)
print(f'ResNet50 Test loss: {resnet50_test_loss:.4f}')
print(f'ResNet50 Test accuracy: {resnet50_test_accuracy:.4f}')

# Function to plot learning curves
def plot_learning_curve(history, model_name):
    plt.figure(figsize=(12, 8))

    # Plot training & validation accuracy values
    plt.subplot(2, 1, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model accuracy - {model_name}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model loss - {model_name}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot learning curve for ResNet50
plot_learning_curve(resnet50_history, 'ResNet50')

# Save the final ResNet50 model
resnet50_model.save(os.path.join(save_dir, 'resnet50_model.keras'))

# Save the training history
with open(os.path.join(save_dir, 'resnet50_history.json'), 'w') as f:
    json.dump(resnet50_history.history, f)

print(f"ResNet50 model and history saved in directory: {save_dir}")


2024-07-02 17:32:34.165881: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8245 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:1a:00.0, compute capability: 8.6
2024-07-02 17:32:34.167479: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 8243 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:68:00.0, compute capability: 8.6
2024-07-02 17:33:08.356813: W external/local_tsl/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 588.0KiB (rounded to 602112)requested by op StridedSlice
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-07-02 17:33:08.357158: I external/local_tsl/tsl/framework/bfc_al

ResourceExhaustedError: {{function_node __wrapped__StridedSlice_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[224,224,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StridedSlice] name: strided_slice/

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import json

# Define a custom callback to print messages and store history
class PrintLogger(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} started.")
    
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} ended.")
        print(f"Accuracy: {logs['accuracy']:.4f} - Loss: {logs['loss']:.4f} - Val Accuracy: {logs['val_accuracy']:.4f} - Val Loss: {logs['val_loss']:.4f}")

# Define directories
save_dir = './split_datav5_full_compressed'

# Load the split data
print("Loading data...")
train_data = np.load(os.path.join(save_dir, 'train_data.npz'))
val_data = np.load(os.path.join(save_dir, 'val_data.npz'))
test_data = np.load(os.path.join(save_dir, 'test_data.npz'))

X_rp_train = train_data['X_rp']
X_gaf_train = train_data['X_gaf']
X_mtf_train = train_data['X_mtf']
y_train = train_data['y']

X_rp_val = val_data['X_rp']
X_gaf_val = val_data['X_gaf']
X_mtf_val = val_data['X_mtf']
y_val = val_data['y']

X_rp_test = test_data['X_rp']
X_gaf_test = test_data['X_gaf']
X_mtf_test = test_data['X_mtf']
y_test = test_data['y']
print("Data loaded.")

# Function to preprocess images
def preprocess_images(images):
    def _preprocess_image(img):
        img = tf.image.resize(img, [224, 224])
        img = img[:, :, :3]  # Keep only RGB channels
        return img

    return np.array([_preprocess_image(img) for img in images])

print("Preprocessing images...")
X_rp_train = preprocess_images(X_rp_train)
X_gaf_train = preprocess_images(X_gaf_train)
X_mtf_train = preprocess_images(X_mtf_train)
X_rp_val = preprocess_images(X_rp_val)
X_gaf_val = preprocess_images(X_gaf_val)
X_mtf_val = preprocess_images(X_mtf_val)
X_rp_test = preprocess_images(X_rp_test)
X_gaf_test = preprocess_images(X_gaf_test)
X_mtf_test = preprocess_images(X_mtf_test)
print("Images preprocessed.")

# Use a smaller batch size
batch_size = 8

# Create TensorFlow datasets
print("Creating TensorFlow datasets...")
train_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_train, X_gaf_train, X_mtf_train), y_train))
val_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_val, X_gaf_val, X_mtf_val), y_val))
test_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_test, X_gaf_test, X_mtf_test), y_test))

# Preprocess the datasets
train_dataset = train_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))
val_dataset = val_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))
test_dataset = test_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))

# Batch the datasets
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
print("Datasets created.")

# Define a function to build the ResNet50 model
def build_resnet50_model(input_shape, num_classes):
    inputs_rp = Input(shape=input_shape, name='input_rp')
    inputs_gaf = Input(shape=input_shape, name='input_gaf')
    inputs_mtf = Input(shape=input_shape, name='input_mtf')
    
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Create separate models for each input with unique names
    base_rp = Model(inputs=inputs_rp, outputs=base_model(inputs_rp), name='resnet50_rp')
    base_gaf = Model(inputs=inputs_gaf, outputs=base_model(inputs_gaf), name='resnet50_gaf')
    base_mtf = Model(inputs=inputs_mtf, outputs=base_model(inputs_mtf), name='resnet50_mtf')
    
    x_rp = base_rp.output
    x_rp = GlobalAveragePooling2D()(x_rp)
    
    x_gaf = base_gaf.output
    x_gaf = GlobalAveragePooling2D()(x_gaf)
    
    x_mtf = base_mtf.output
    x_mtf = GlobalAveragePooling2D()(x_mtf)
    
    merged = Concatenate()([x_rp, x_gaf, x_mtf])
    
    x = Dense(512, activation='relu', name='dense_512')(merged)
    x = Dropout(0.5, name='dropout_512')(x)
    x = Dense(128, activation='relu', name='dense_128')(x)
    x = Dropout(0.5, name='dropout_128')(x)
    outputs = Dense(num_classes, activation='softmax', name='output')(x)
    
    model = Model(inputs=[inputs_rp, inputs_gaf, inputs_mtf], outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 5

# Build ResNet50 model
resnet50_model = build_resnet50_model(input_shape, num_classes)
resnet50_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
resnet50_checkpoint = ModelCheckpoint(filepath=os.path.join(save_dir, 'resnet50_best_model.keras'), monitor='val_loss', save_best_only=True)

# Train ResNet50 model
print("Starting training...")
resnet50_history = resnet50_model.fit(
    train_dataset,
    epochs=10, 
    validation_data=val_dataset,
    callbacks=[early_stopping, resnet50_checkpoint, PrintLogger()],
    verbose=1
)

# Evaluate ResNet50 model on the test set
resnet50_model.load_weights(os.path.join(save_dir, 'resnet50_best_model.keras'))
resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate(test_dataset)
print(f'ResNet50 Test loss: {resnet50_test_loss:.4f}')
print(f'ResNet50 Test accuracy: {resnet50_test_accuracy:.4f}')

# Function to plot learning curves
def plot_learning_curve(history, model_name):
    plt.figure(figsize=(12, 8))

    # Plot training & validation accuracy values
    plt.subplot(2, 1, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model accuracy - {model_name}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model loss - {model_name}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot learning curve for ResNet50
plot_learning_curve(resnet50_history, 'ResNet50')

# Save the final ResNet50 model
resnet50_model.save(os.path.join(save_dir, 'resnet50_model.keras'))

# Save the training history
with open(os.path.join(save_dir, 'resnet50_history.json'), 'w') as f:
    json.dump(resnet50_history.history, f)

print(f"ResNet50 model and history saved in directory: {save_dir}")


2024-07-02 17:56:37.171374: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-02 17:56:37.200960: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-02 17:56:37.200982: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-02 17:56:37.201655: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-02 17:56:37.206358: I tensorflow/core/platform/cpu_feature_guar

Loading data...
Data loaded.
Preprocessing images...


2024-07-02 17:57:46.952864: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8245 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:1a:00.0, compute capability: 8.6
2024-07-02 17:57:46.953325: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 8243 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:68:00.0, compute capability: 8.6
2024-07-02 17:58:21.445798: W external/local_tsl/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 588.0KiB (rounded to 602112)requested by op StridedSlice
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-07-02 17:58:21.446157: I external/local_tsl/tsl/framework/bfc_al

ResourceExhaustedError: {{function_node __wrapped__StridedSlice_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[224,224,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StridedSlice] name: strided_slice/

In [2]:
#resize images in smaller batches
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import json

# Define a custom callback to print messages and store history
class PrintLogger(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} started.")
    
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} ended.")
        print(f"Accuracy: {logs['accuracy']:.4f} - Loss: {logs['loss']:.4f} - Val Accuracy: {logs['val_accuracy']:.4f} - Val Loss: {logs['val_loss']:.4f}")

# Define directories
save_dir = './split_datav5_full_compressed'

# Load the split data
print("Loading data...")
train_data = np.load(os.path.join(save_dir, 'train_data.npz'))
val_data = np.load(os.path.join(save_dir, 'val_data.npz'))
test_data = np.load(os.path.join(save_dir, 'test_data.npz'))

X_rp_train = train_data['X_rp']
X_gaf_train = train_data['X_gaf']
X_mtf_train = train_data['X_mtf']
y_train = train_data['y']

X_rp_val = val_data['X_rp']
X_gaf_val = val_data['X_gaf']
X_mtf_val = val_data['X_mtf']
y_val = val_data['y']

X_rp_test = test_data['X_rp']
X_gaf_test = test_data['X_gaf']
X_mtf_test = test_data['X_mtf']
y_test = test_data['y']
print("Data loaded.")

# Function to preprocess images in batches
def preprocess_images(images, batch_size=100):
    resized_images = []
    for i in range(0, len(images), batch_size):
        batch = images[i:i+batch_size]
        batch_resized = tf.image.resize(batch, [224, 224]).numpy()
        batch_resized = batch_resized[:, :, :, :3]  # Keep only RGB channels
        resized_images.extend(batch_resized)
    return np.array(resized_images)

print("Preprocessing images...")
X_rp_train = preprocess_images(X_rp_train)
X_gaf_train = preprocess_images(X_gaf_train)
X_mtf_train = preprocess_images(X_mtf_train)
X_rp_val = preprocess_images(X_rp_val)
X_gaf_val = preprocess_images(X_gaf_val)
X_mtf_val = preprocess_images(X_mtf_val)
X_rp_test = preprocess_images(X_rp_test)
X_gaf_test = preprocess_images(X_gaf_test)
X_mtf_test = preprocess_images(X_mtf_test)
print("Images preprocessed.")

# Use a smaller batch size
batch_size = 4

# Create TensorFlow datasets
print("Creating TensorFlow datasets...")
train_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_train, X_gaf_train, X_mtf_train), y_train))
val_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_val, X_gaf_val, X_mtf_val), y_val))
test_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_test, X_gaf_test, X_mtf_test), y_test))

# Preprocess the datasets
train_dataset = train_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))
val_dataset = val_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))
test_dataset = test_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))

# Batch the datasets
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
print("Datasets created.")

# Define a function to build the ResNet50 model
def build_resnet50_model(input_shape, num_classes):
    inputs_rp = Input(shape=input_shape, name='input_rp')
    inputs_gaf = Input(shape=input_shape, name='input_gaf')
    inputs_mtf = Input(shape=input_shape, name='input_mtf')
    
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Create separate models for each input with unique names
    base_rp = Model(inputs=inputs_rp, outputs=base_model(inputs_rp), name='resnet50_rp')
    base_gaf = Model(inputs=inputs_gaf, outputs=base_model(inputs_gaf), name='resnet50_gaf')
    base_mtf = Model(inputs=inputs_mtf, outputs=base_model(inputs_mtf), name='resnet50_mtf')
    
    x_rp = base_rp.output
    x_rp = GlobalAveragePooling2D()(x_rp)
    
    x_gaf = base_gaf.output
    x_gaf = GlobalAveragePooling2D()(x_gaf)
    
    x_mtf = base_mtf.output
    x_mtf = GlobalAveragePooling2D()(x_mtf)
    
    merged = Concatenate()([x_rp, x_gaf, x_mtf])
    
    x = Dense(512, activation='relu', name='dense_512')(merged)
    x = Dropout(0.5, name='dropout_512')(x)
    x = Dense(128, activation='relu', name='dense_128')(x)
    x = Dropout(0.5, name='dropout_128')(x)
    outputs = Dense(num_classes, activation='softmax', name='output')(x)
    
    model = Model(inputs=[inputs_rp, inputs_gaf, inputs_mtf], outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 5

# Build ResNet50 model
resnet50_model = build_resnet50_model(input_shape, num_classes)
resnet50_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
resnet50_checkpoint = ModelCheckpoint(filepath=os.path.join(save_dir, 'resnet50_best_model.keras'), monitor='val_loss', save_best_only=True)

# Train ResNet50 model
print("Starting training...")
resnet50_history = resnet50_model.fit(
    train_dataset,
    epochs=10, 
    validation_data=val_dataset,
    callbacks=[early_stopping, resnet50_checkpoint, PrintLogger()],
    verbose=1
)

# Evaluate ResNet50 model on the test set
resnet50_model.load_weights(os.path.join(save_dir, 'resnet50_best_model.keras'))
resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate(test_dataset)
print(f'ResNet50 Test loss: {resnet50_test_loss:.4f}')
print(f'ResNet50 Test accuracy: {resnet50_test_accuracy:.4f}')

# Function to plot learning curves
def plot_learning_curve(history, model_name):
    plt.figure(figsize=(12, 8))

    # Plot training & validation accuracy values
    plt.subplot(2, 1, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model accuracy - {model_name}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model loss - {model_name}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot learning curve for ResNet50
plot_learning_curve(resnet50_history, 'ResNet50')

# Save the final ResNet50 model
resnet50_model.save(os.path.join(save_dir, 'resnet50_model.keras'))

# Save the training history
with open(os.path.join(save_dir, 'resnet50_history.json'), 'w') as f:
    json.dump(resnet50_history.history, f)

print(f"ResNet50 model and history saved in directory: {save_dir}")



Loading data...
Data loaded.
Preprocessing images...


2024-07-03 14:19:57.649522: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1092 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:1a:00.0, compute capability: 8.6
2024-07-03 14:19:57.649643: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 8026 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:68:00.0, compute capability: 8.6


: 

In [1]:
# with image batcher 
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import json
import cv2
import logging
import time

LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
logging.basicConfig(format=LOG_FORMAT)
logging.getLogger("IMG").setLevel(logging.DEBUG)
log = logging.getLogger("IMG")

class PrintLogger(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} started.")
    
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} ended.")
        print(f"Accuracy: {logs['accuracy']:.4f} - Loss: {logs['loss']:.4f} - Val Accuracy: {logs['val_accuracy']:.4f} - Val Loss: {logs['val_loss']:.4f}")

class ImageBatcher:
    def __init__(self, images, batch_size: int, image_size: int, shuffle: bool, drop_last: bool, max_batch: int = 0) -> None:
        log.info("Initializing image batcher")
        self.images = images
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.max_batch = max_batch

        self.num_images = self.images.shape[0]
        self.shape = (batch_size, self.images.shape[1], image_size, image_size)
        self.size = int(np.prod(self.shape))
        self.dtype = self.images.dtype
        self.nbytes = self.dtype.itemsize * self.size

        log.debug(f"Image Fields... \
            \n\tNumber of images: {self.num_images} \
            \n\tType: {self.dtype} \
            \n\tBatch Shape: {self.shape} \
            \n\tBatch Size: {self.size} \
            \n\tBatch Bytes: {self.nbytes}")

        self.indices = None
        self.current_index = 0
        self.current_batch = 0
        self.epoch = 0
        self._initialize_indices()

        log.info("Successfully created image batcher")

    def _initialize_indices(self) -> None:
        log.debug(f"Initializing indices. Shuffle: {self.shuffle}")
        self.indices = np.arange(self.num_images)

        if self.shuffle:
            np.random.shuffle(self.indices)

    def __iter__(self):
        return self

    def __next__(self) -> np.ndarray:
        self.current_batch += 1

        if self.current_index >= self.num_images:
            self._batch_completed()
            raise StopIteration
        
        if self.max_batch != 0 and self.current_batch > self.max_batch:
            self._batch_completed()
            raise StopIteration
        
        start_idx = self.current_index
        end_idx = min(self.current_index + self.batch_size, self.num_images)
        batch_indices = self.indices[start_idx: end_idx]

        if self.drop_last and end_idx - start_idx != self.batch_size:
            log.debug("Skipped last batch")
            self._batch_completed()
            raise StopIteration
        
        batch_images = self._get_batch_from_dataset(batch_indices)

        self.current_index = end_idx
        return batch_images

    def _batch_completed(self) -> None:
        log.info("Batch completed")
        self.epoch += 1
        self.current_index = 0
        self.current_batch = 0

        if self.shuffle:
            np.random.shuffle(self.indices)

    def _get_batch_from_dataset(self, indices: list[int]) -> np.ndarray:
        if self.shuffle:
            batch_images = []
            for i in indices:
                batch_images.append(self.images[i])
            
            batch_images = np.stack(batch_images, axis=0)
            return self._preprocess_images(batch_images)
        else:
            batch_images = self.images[indices]
            return self._preprocess_images(batch_images)

    def _preprocess_images(self, images: np.ndarray) -> np.ndarray:
        output_size = (self.image_size, self.image_size)
        resized_images = np.empty((images.shape[0], *output_size, 3))
        resized_images = resized_images.astype(self.dtype)

        for i, image in enumerate(images):
            if image.shape[2] == 4:  # If image has 4 channels (RGBA), convert to 3 channels (RGB)
                image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
            resized_images[i] = cv2.resize(image, output_size, interpolation=cv2.INTER_LINEAR)
        
        return resized_images

# Function to resize images using the ImageBatcher
def resize_images_with_batcher(images, batch_size, image_size, shuffle=False):
    batcher = ImageBatcher(images, batch_size=batch_size, image_size=image_size, shuffle=shuffle, drop_last=False)
    resized_images = []

    for batch in batcher:
        resized_images.append(batch)

    return np.vstack(resized_images)

# Define directories
save_dir = './split_datav5_full_compressed'

# Load the split data
print("Loading data...")
train_data = np.load(os.path.join(save_dir, 'train_data.npz'))
val_data = np.load(os.path.join(save_dir, 'val_data.npz'))
test_data = np.load(os.path.join(save_dir, 'test_data.npz'))

X_rp_train = train_data['X_rp']
X_gaf_train = train_data['X_gaf']
X_mtf_train = train_data['X_mtf']
y_train = train_data['y']

X_rp_val = val_data['X_rp']
X_gaf_val = val_data['X_gaf']
X_mtf_val = val_data['X_mtf']
y_val = val_data['y']

X_rp_test = test_data['X_rp']
X_gaf_test = test_data['X_gaf']
X_mtf_test = test_data['X_mtf']
y_test = test_data['y']
print("Data loaded.")

print("Preprocessing images...")
X_rp_train = resize_images_with_batcher(X_rp_train, batch_size=32, image_size=224)
X_gaf_train = resize_images_with_batcher(X_gaf_train, batch_size=32, image_size=224)
X_mtf_train = resize_images_with_batcher(X_mtf_train, batch_size=32, image_size=224)
X_rp_val = resize_images_with_batcher(X_rp_val, batch_size=32, image_size=224)
X_gaf_val = resize_images_with_batcher(X_gaf_val, batch_size=32, image_size=224)
X_mtf_val = resize_images_with_batcher(X_mtf_val, batch_size=32, image_size=224)
X_rp_test = resize_images_with_batcher(X_rp_test, batch_size=32, image_size=224)
X_gaf_test = resize_images_with_batcher(X_gaf_test, batch_size=32, image_size=224)
X_mtf_test = resize_images_with_batcher(X_mtf_test, batch_size=32, image_size=224)
print("Images preprocessed.")

# Use a smaller batch size
batch_size = 4

# Create TensorFlow datasets
print("Creating TensorFlow datasets...")
train_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_train, X_gaf_train, X_mtf_train), y_train))
val_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_val, X_gaf_val, X_mtf_val), y_val))
test_dataset = tf.data.Dataset.from_tensor_slices(((X_rp_test, X_gaf_test, X_mtf_test), y_test))

# Preprocess the datasets
train_dataset = train_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))
val_dataset = val_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))
test_dataset = test_dataset.map(lambda x, y: ((x[0], x[1], x[2]), y))

# Batch the datasets
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
print("Datasets created.")

# Define a function to build the ResNet50 model
def build_resnet50_model(input_shape, num_classes):
    inputs_rp = Input(shape=input_shape, name='input_rp')
    inputs_gaf = Input(shape=input_shape, name='input_gaf')
    inputs_mtf = Input(shape=input_shape, name='input_mtf')
    
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Create separate models for each input with unique names
    base_rp = Model(inputs=inputs_rp, outputs=base_model(inputs_rp), name='resnet50_rp')
    base_gaf = Model(inputs=inputs_gaf, outputs=base_model(inputs_gaf), name='resnet50_gaf')
    base_mtf = Model(inputs=inputs_mtf, outputs=base_model(inputs_mtf), name='resnet50_mtf')
    
    x_rp = base_rp.output
    x_rp = GlobalAveragePooling2D()(x_rp)
    
    x_gaf = base_gaf.output
    x_gaf = GlobalAveragePooling2D()(x_gaf)
    
    x_mtf = base_mtf.output
    x_mtf = GlobalAveragePooling2D()(x_mtf)
    
    merged = Concatenate()([x_rp, x_gaf, x_mtf])
    
    x = Dense(512, activation='relu', name='dense_512')(merged)
    x = Dropout(0.5, name='dropout_512')(x)
    x = Dense(128, activation='relu', name='dense_128')(x)
    x = Dropout(0.5, name='dropout_128')(x)
    outputs = Dense(num_classes, activation='softmax', name='output')(x)
    
    model = Model(inputs=[inputs_rp, inputs_gaf, inputs_mtf], outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 5

# Build ResNet50 model
resnet50_model = build_resnet50_model(input_shape, num_classes)
resnet50_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
resnet50_checkpoint = ModelCheckpoint(filepath=os.path.join(save_dir, 'resnet50_best_model.keras'), monitor='val_loss', save_best_only=True)

# Train ResNet50 model
print("Starting training...")
resnet50_history = resnet50_model.fit(
    train_dataset,
    epochs=10, 
    validation_data=val_dataset,
    callbacks=[early_stopping, resnet50_checkpoint, PrintLogger()],
    verbose=1
)

# Evaluate ResNet50 model on the test set
resnet50_model.load_weights(os.path.join(save_dir, 'resnet50_best_model.keras'))
resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate(test_dataset)
print(f'ResNet50 Test loss: {resnet50_test_loss:.4f}')
print(f'ResNet50 Test accuracy: {resnet50_test_accuracy:.4f}')

# Function to plot learning curves
def plot_learning_curve(history, model_name):
    plt.figure(figsize=(12, 8))

    # Plot training & validation accuracy values
    plt.subplot(2, 1, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model accuracy - {model_name}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model loss - {model_name}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot learning curve for ResNet50
plot_learning_curve(resnet50_history, 'ResNet50')

# Save the final ResNet50 model
resnet50_model.save(os.path.join(save_dir, 'resnet50_model.keras'))

# Save the training history
with open(os.path.join(save_dir, 'resnet50_history.json'), 'w') as f:
    json.dump(resnet50_history.history, f)

print(f"ResNet50 model and history saved in directory: {save_dir}")


2024-07-07 15:45:24.547909: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-07 15:45:24.577066: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-07 15:45:24.577089: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-07 15:45:24.577688: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-07 15:45:24.582331: I tensorflow/core/platform/cpu_feature_guar

Loading data...


[2024-07-07 15:46:34,250] [IMG] [INFO] Initializing image batcher
[2024-07-07 15:46:34,252] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 15:46:34,252] [IMG] [DEBUG] Initializing indices. Shuffle: False
[2024-07-07 15:46:34,253] [IMG] [INFO] Successfully created image batcher


Data loaded.
Preprocessing images...


[2024-07-07 15:46:48,342] [IMG] [INFO] Batch completed
[2024-07-07 15:46:51,408] [IMG] [INFO] Initializing image batcher
[2024-07-07 15:46:51,409] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 15:46:51,409] [IMG] [DEBUG] Initializing indices. Shuffle: False
[2024-07-07 15:46:51,410] [IMG] [INFO] Successfully created image batcher
  resized_images = resized_images.astype(self.dtype)
[2024-07-07 15:47:05,742] [IMG] [INFO] Batch completed
[2024-07-07 15:47:08,941] [IMG] [INFO] Initializing image batcher
[2024-07-07 15:47:08,942] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 15:47:08,943] [IMG] [DEBUG] Initializing indices. Shuffle: Fal

Images preprocessed.
Creating TensorFlow datasets...


2024-07-07 15:47:51.628504: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1063 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:1a:00.0, compute capability: 8.6
2024-07-07 15:47:51.628993: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 7883 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:68:00.0, compute capability: 8.6
2024-07-07 15:48:06.171224: W external/local_tsl/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 9.88GiB (rounded to 10609213440)requested by op _EagerConst
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-07-07 15:48:06.171271: I external/local_tsl/tsl/framework/bfc

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import json
import cv2
import logging
import time

LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
logging.basicConfig(format=LOG_FORMAT)
logging.getLogger("IMG").setLevel(logging.DEBUG)
log = logging.getLogger("IMG")

class PrintLogger(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} started.")
    
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} ended.")
        print(f"Accuracy: {logs['accuracy']:.4f} - Loss: {logs['loss']:.4f} - Val Accuracy: {logs['val_accuracy']:.4f} - Val Loss: {logs['val_loss']:.4f}")

class ImageBatcher:
    def __init__(self, images, batch_size: int, image_size: int, shuffle: bool, drop_last: bool, max_batch: int = 0) -> None:
        log.info("Initializing image batcher")
        self.images = images
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.max_batch = max_batch

        self.num_images = self.images.shape[0]
        self.shape = (batch_size, self.images.shape[1], image_size, image_size)
        self.size = int(np.prod(self.shape))
        self.dtype = self.images.dtype
        self.nbytes = self.dtype.itemsize * self.size

        log.debug(f"Image Fields... \
            \n\tNumber of images: {self.num_images} \
            \n\tType: {self.dtype} \
            \n\tBatch Shape: {self.shape} \
            \n\tBatch Size: {self.size} \
            \n\tBatch Bytes: {self.nbytes}")

        self.indices = None
        self.current_index = 0
        self.current_batch = 0
        self.epoch = 0
        self._initialize_indices()

        log.info("Successfully created image batcher")

    def _initialize_indices(self) -> None:
        log.debug(f"Initializing indices. Shuffle: {self.shuffle}")
        self.indices = np.arange(self.num_images)

        if self.shuffle:
            np.random.shuffle(self.indices)

    def __iter__(self):
        return self

    def __next__(self) -> np.ndarray:
        self.current_batch += 1

        if self.current_index >= self.num_images:
            self._batch_completed()
            raise StopIteration
        
        if self.max_batch != 0 and self.current_batch > self.max_batch:
            self._batch_completed()
            raise StopIteration
        
        start_idx = self.current_index
        end_idx = min(self.current_index + self.batch_size, self.num_images)
        batch_indices = self.indices[start_idx: end_idx]

        if self.drop_last and end_idx - start_idx != self.batch_size:
            log.debug("Skipped last batch")
            self._batch_completed()
            raise StopIteration
        
        batch_images = self._get_batch_from_dataset(batch_indices)

        self.current_index = end_idx
        return batch_images

    def _batch_completed(self) -> None:
        log.info("Batch completed")
        self.epoch += 1
        self.current_index = 0
        self.current_batch = 0

        if self.shuffle:
            np.random.shuffle(self.indices)

    def _get_batch_from_dataset(self, indices: list[int]) -> np.ndarray:
        if self.shuffle:
            batch_images = []
            for i in indices:
                batch_images.append(self.images[i])
            
            batch_images = np.stack(batch_images, axis=0)
            return self._preprocess_images(batch_images)
        else:
            batch_images = self.images[indices]
            return self._preprocess_images(batch_images)

    def _preprocess_images(self, images: np.ndarray) -> np.ndarray:
        output_size = (self.image_size, self.image_size)
        resized_images = np.empty((images.shape[0], *output_size, 3))
        resized_images = resized_images.astype(self.dtype)

        for i, image in enumerate(images):
            if image.shape[2] == 4:  # If image has 4 channels (RGBA), convert to 3 channels (RGB)
                image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
            resized_images[i] = cv2.resize(image, output_size, interpolation=cv2.INTER_LINEAR)
        
        return resized_images

# Function to resize images using the ImageBatcher
def resize_images_with_batcher(images, batch_size, image_size, shuffle=False):
    batcher = ImageBatcher(images, batch_size=batch_size, image_size=image_size, shuffle=shuffle, drop_last=False)
    resized_images = []

    for batch in batcher:
        resized_images.append(batch)

    return np.vstack(resized_images)

# Define directories
save_dir = './split_datav5_full_compressed'

# Load the split data
print("Loading data...")
train_data = np.load(os.path.join(save_dir, 'train_data.npz'))
val_data = np.load(os.path.join(save_dir, 'val_data.npz'))
test_data = np.load(os.path.join(save_dir, 'test_data.npz'))

X_rp_train = train_data['X_rp']
X_gaf_train = train_data['X_gaf']
X_mtf_train = train_data['X_mtf']
y_train = train_data['y']

X_rp_val = val_data['X_rp']
X_gaf_val = val_data['X_gaf']
X_mtf_val = val_data['X_mtf']
y_val = val_data['y']

X_rp_test = test_data['X_rp']
X_gaf_test = test_data['X_gaf']
X_mtf_test = test_data['X_mtf']
y_test = test_data['y']
print("Data loaded.")

print("Preprocessing images...")
X_rp_train = resize_images_with_batcher(X_rp_train, batch_size=32, image_size=224)
X_gaf_train = resize_images_with_batcher(X_gaf_train, batch_size=32, image_size=224)
X_mtf_train = resize_images_with_batcher(X_mtf_train, batch_size=32, image_size=224)
X_rp_val = resize_images_with_batcher(X_rp_val, batch_size=32, image_size=224)
X_gaf_val = resize_images_with_batcher(X_gaf_val, batch_size=32, image_size=224)
X_mtf_val = resize_images_with_batcher(X_mtf_val, batch_size=32, image_size=224)
X_rp_test = resize_images_with_batcher(X_rp_test, batch_size=32, image_size=224)
X_gaf_test = resize_images_with_batcher(X_gaf_test, batch_size=32, image_size=224)
X_mtf_test = resize_images_with_batcher(X_mtf_test, batch_size=32, image_size=224)
print("Images preprocessed.")

# Use a smaller batch size
batch_size = 4

# Generator to yield batches of data
def data_generator(X_rp, X_gaf, X_mtf, y, batch_size):
    num_samples = len(y)
    while True:
        for offset in range(0, num_samples, batch_size):
            X_rp_batch = X_rp[offset:offset+batch_size]
            X_gaf_batch = X_gaf[offset:offset+batch_size]
            X_mtf_batch = X_mtf[offset:offset+batch_size]
            y_batch = y[offset:offset+batch_size]
            yield ([X_rp_batch, X_gaf_batch, X_mtf_batch], y_batch)

train_gen = data_generator(X_rp_train, X_gaf_train, X_mtf_train, y_train, batch_size)
val_gen = data_generator(X_rp_val, X_gaf_val, X_mtf_val, y_val, batch_size)
test_gen = data_generator(X_rp_test, X_gaf_test, X_mtf_test, y_test, batch_size)

# Define a function to build the ResNet50 model
def build_resnet50_model(input_shape, num_classes):
    inputs_rp = Input(shape=input_shape, name='input_rp')
    inputs_gaf = Input(shape=input_shape, name='input_gaf')
    inputs_mtf = Input(shape=input_shape, name='input_mtf')
    
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Create separate models for each input with unique names
    base_rp = Model(inputs=inputs_rp, outputs=base_model(inputs_rp), name='resnet50_rp')
    base_gaf = Model(inputs=inputs_gaf, outputs=base_model(inputs_gaf), name='resnet50_gaf')
    base_mtf = Model(inputs=inputs_mtf, outputs=base_model(inputs_mtf), name='resnet50_mtf')
    
    x_rp = base_rp.output
    x_rp = GlobalAveragePooling2D()(x_rp)
    
    x_gaf = base_gaf.output
    x_gaf = GlobalAveragePooling2D()(x_gaf)
    
    x_mtf = base_mtf.output
    x_mtf = GlobalAveragePooling2D()(x_mtf)
    
    merged = Concatenate()([x_rp, x_gaf, x_mtf])
    
    x = Dense(512, activation='relu', name='dense_512')(merged)
    x = Dropout(0.5, name='dropout_512')(x)
    x = Dense(128, activation='relu', name='dense_128')(x)
    x = Dropout(0.5, name='dropout_128')(x)
    outputs = Dense(num_classes, activation='softmax', name='output')(x)
    
    model = Model(inputs=[inputs_rp, inputs_gaf, inputs_mtf], outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 5

# Build ResNet50 model
resnet50_model = build_resnet50_model(input_shape, num_classes)
resnet50_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
resnet50_checkpoint = ModelCheckpoint(filepath=os.path.join(save_dir, 'resnet50_best_model.keras'), monitor='val_loss', save_best_only=True)

# Train ResNet50 model
print("Starting training...")
steps_per_epoch = len(y_train) // batch_size
validation_steps = len(y_val) // batch_size
resnet50_history = resnet50_model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=10,
    validation_data=val_gen,
    validation_steps=validation_steps,
    callbacks=[early_stopping, resnet50_checkpoint, PrintLogger()],
    verbose=1
)

# Evaluate ResNet50 model on the test set
print("Evaluating on test set...")
test_steps = len(y_test) // batch_size
resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate(test_gen, steps=test_steps)
print(f'ResNet50 Test loss: {resnet50_test_loss:.4f}')
print(f'ResNet50 Test accuracy: {resnet50_test_accuracy:.4f}')

# Function to plot learning curves
def plot_learning_curve(history, model_name):
    plt.figure(figsize=(12, 8))

    # Plot training & validation accuracy values
    plt.subplot(2, 1, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model accuracy - {model_name}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model loss - {model_name}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot learning curve for ResNet50
plot_learning_curve(resnet50_history, 'ResNet50')

# Save the final ResNet50 model
resnet50_model.save(os.path.join(save_dir, 'resnet50_model.keras'))

# Save the training history
with open(os.path.join(save_dir, 'resnet50_history.json'), 'w') as f:
    json.dump(resnet50_history.history, f)

print(f"ResNet50 model and history saved in directory: {save_dir}")


Loading data...


[2024-07-07 17:47:38,685] [IMG] [INFO] Initializing image batcher
[2024-07-07 17:47:38,686] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 17:47:38,687] [IMG] [DEBUG] Initializing indices. Shuffle: False
[2024-07-07 17:47:38,688] [IMG] [INFO] Successfully created image batcher


Data loaded.
Preprocessing images...


[2024-07-07 17:47:53,282] [IMG] [INFO] Batch completed
[2024-07-07 17:47:56,221] [IMG] [INFO] Initializing image batcher
[2024-07-07 17:47:56,222] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 17:47:56,222] [IMG] [DEBUG] Initializing indices. Shuffle: False
[2024-07-07 17:47:56,223] [IMG] [INFO] Successfully created image batcher
  resized_images = resized_images.astype(self.dtype)
[2024-07-07 17:48:13,305] [IMG] [INFO] Batch completed
[2024-07-07 17:48:16,231] [IMG] [INFO] Initializing image batcher
[2024-07-07 17:48:16,232] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 17:48:16,232] [IMG] [DEBUG] Initializing indices. Shuffle: Fal

Images preprocessed.


2024-07-07 17:49:00.236110: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1063 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:1a:00.0, compute capability: 8.6
2024-07-07 17:49:00.236297: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 7883 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:68:00.0, compute capability: 8.6


Starting training...

Epoch 1 started.
Epoch 1/10


2024-07-07 17:49:16.797822: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2024-07-07 17:49:18.268996: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 19.06MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-07-07 17:49:18.269024: W tensorflow/core/kernels/gpu_utils.cc:54] Failed to allocate memory for convolution redzone checking; skipping this check. This is benign and only means that we won't check cudnn for out-of-bounds reads and writes. This message will only be printed once.
2024-07-07 17:49:18.278361: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 24.12MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if mor

NotFoundError: Graph execution error:

Detected at node model/resnet50/conv4_block1_1_conv/Conv2D defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 359, in execute_request

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 446, in do_execute

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/tmp/ipykernel_127831/2241950071.py", line 240, in <module>

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 1150, in train_step

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 590, in __call__

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/functional.py", line 515, in call

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/functional.py", line 672, in _run_internal_graph

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/training.py", line 590, in __call__

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/functional.py", line 515, in call

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/functional.py", line 672, in _run_internal_graph

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/layers/convolutional/base_conv.py", line 290, in call

  File "/home/researchgroup/miniconda3/envs/newtest/lib/python3.11/site-packages/keras/src/layers/convolutional/base_conv.py", line 262, in convolution_op

No algorithm worked!  Error messages:
  Profiling failure on CUDNN engine eng1{}: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16779264 bytes.
  Profiling failure on CUDNN engine eng28{}: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
  Profiling failure on CUDNN engine eng0{}: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
	 [[{{node model/resnet50/conv4_block1_1_conv/Conv2D}}]] [Op:__inference_train_function_35728]

In [4]:
#again debug
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import json
import cv2
import logging
import time

# Enable mixed precision training
tf.keras.mixed_precision.set_global_policy('mixed_float16')

LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
logging.basicConfig(format=LOG_FORMAT)
logging.getLogger("IMG").setLevel(logging.DEBUG)
log = logging.getLogger("IMG")

class PrintLogger(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} started.")
    
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1} ended.")
        print(f"Accuracy: {logs['accuracy']:.4f} - Loss: {logs['loss']:.4f} - Val Accuracy: {logs['val_accuracy']:.4f} - Val Loss: {logs['val_loss']:.4f}")

class ImageBatcher:
    def __init__(self, images, batch_size: int, image_size: int, shuffle: bool, drop_last: bool, max_batch: int = 0) -> None:
        log.info("Initializing image batcher")
        self.images = images
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.max_batch = max_batch

        self.num_images = self.images.shape[0]
        self.shape = (batch_size, self.images.shape[1], image_size, image_size)
        self.size = int(np.prod(self.shape))
        self.dtype = self.images.dtype
        self.nbytes = self.dtype.itemsize * self.size

        log.debug(f"Image Fields... \
            \n\tNumber of images: {self.num_images} \
            \n\tType: {self.dtype} \
            \n\tBatch Shape: {self.shape} \
            \n\tBatch Size: {self.size} \
            \n\tBatch Bytes: {self.nbytes}")

        self.indices = None
        self.current_index = 0
        self.current_batch = 0
        self.epoch = 0
        self._initialize_indices()

        log.info("Successfully created image batcher")

    def _initialize_indices(self) -> None:
        log.debug(f"Initializing indices. Shuffle: {self.shuffle}")
        self.indices = np.arange(self.num_images)

        if self.shuffle:
            np.random.shuffle(self.indices)

    def __iter__(self):
        return self

    def __next__(self) -> np.ndarray:
        self.current_batch += 1

        if self.current_index >= self.num_images:
            self._batch_completed()
            raise StopIteration
        
        if self.max_batch != 0 and self.current_batch > self.max_batch:
            self._batch_completed()
            raise StopIteration
        
        start_idx = self.current_index
        end_idx = min(self.current_index + self.batch_size, self.num_images)
        batch_indices = self.indices[start_idx: end_idx]

        if self.drop_last and end_idx - start_idx != self.batch_size:
            log.debug("Skipped last batch")
            self._batch_completed()
            raise StopIteration
        
        batch_images = self._get_batch_from_dataset(batch_indices)

        self.current_index = end_idx
        return batch_images

    def _batch_completed(self) -> None:
        log.info("Batch completed")
        self.epoch += 1
        self.current_index = 0
        self.current_batch = 0

        if self.shuffle:
            np.random.shuffle(self.indices)

    def _get_batch_from_dataset(self, indices: list[int]) -> np.ndarray:
        if self.shuffle:
            batch_images = []
            for i in indices:
                batch_images.append(self.images[i])
            
            batch_images = np.stack(batch_images, axis=0)
            return self._preprocess_images(batch_images)
        else:
            batch_images = self.images[indices]
            return self._preprocess_images(batch_images)

    def _preprocess_images(self, images: np.ndarray) -> np.ndarray:
        output_size = (self.image_size, self.image_size)
        resized_images = np.empty((images.shape[0], *output_size, 3))
        resized_images = resized_images.astype(self.dtype)

        for i, image in enumerate(images):
            if image.shape[2] == 4:  # If image has 4 channels (RGBA), convert to 3 channels (RGB)
                image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
            resized_images[i] = cv2.resize(image, output_size, interpolation=cv2.INTER_LINEAR)
        
        return resized_images

# Function to resize images using the ImageBatcher
def resize_images_with_batcher(images, batch_size, image_size, shuffle=False):
    batcher = ImageBatcher(images, batch_size=batch_size, image_size=image_size, shuffle=shuffle, drop_last=False)
    resized_images = []

    for batch in batcher:
        resized_images.append(batch)

    return np.vstack(resized_images)

# Define directories
save_dir = './split_datav5_full_compressed'

# Load the split data
print("Loading data...")
train_data = np.load(os.path.join(save_dir, 'train_data.npz'))
val_data = np.load(os.path.join(save_dir, 'val_data.npz'))
test_data = np.load(os.path.join(save_dir, 'test_data.npz'))

X_rp_train = train_data['X_rp']
X_gaf_train = train_data['X_gaf']
X_mtf_train = train_data['X_mtf']
y_train = train_data['y']

X_rp_val = val_data['X_rp']
X_gaf_val = val_data['X_gaf']
X_mtf_val = val_data['X_mtf']
y_val = val_data['y']

X_rp_test = test_data['X_rp']
X_gaf_test = test_data['X_gaf']
X_mtf_test = test_data['X_mtf']
y_test = test_data['y']
print("Data loaded.")

print("Preprocessing images...")
X_rp_train = resize_images_with_batcher(X_rp_train, batch_size=32, image_size=224)
X_gaf_train = resize_images_with_batcher(X_gaf_train, batch_size=32, image_size=224)
X_mtf_train = resize_images_with_batcher(X_mtf_train, batch_size=32, image_size=224)
X_rp_val = resize_images_with_batcher(X_rp_val, batch_size=32, image_size=224)
X_gaf_val = resize_images_with_batcher(X_gaf_val, batch_size=32, image_size=224)
X_mtf_val = resize_images_with_batcher(X_mtf_val, batch_size=32, image_size=224)
X_rp_test = resize_images_with_batcher(X_rp_test, batch_size=32, image_size=224)
X_gaf_test = resize_images_with_batcher(X_gaf_test, batch_size=32, image_size=224)
X_mtf_test = resize_images_with_batcher(X_mtf_test, batch_size=32, image_size=224)
print("Images preprocessed.")

# Use a smaller batch size
batch_size = 1

# Generator to yield batches of data
def data_generator(X_rp, X_gaf, X_mtf, y, batch_size):
    num_samples = len(y)
    while True:
        for offset in range(0, num_samples, batch_size):
            X_rp_batch = X_rp[offset:offset+batch_size]
            X_gaf_batch = X_gaf[offset:offset+batch_size]
            X_mtf_batch = X_mtf[offset:offset+batch_size]
            y_batch = y[offset:offset+batch_size]
            yield ([X_rp_batch, X_gaf_batch, X_mtf_batch], y_batch)

train_gen = data_generator(X_rp_train, X_gaf_train, X_mtf_train, y_train, batch_size)
val_gen = data_generator(X_rp_val, X_gaf_val, X_mtf_val, y_val, batch_size)
test_gen = data_generator(X_rp_test, X_gaf_test, X_mtf_test, y_test, batch_size)

# Define a function to build the ResNet50 model
def build_resnet50_model(input_shape, num_classes):
    inputs_rp = Input(shape=input_shape, name='input_rp')
    inputs_gaf = Input(shape=input_shape, name='input_gaf')
    inputs_mtf = Input(shape=input_shape, name='input_mtf')
    
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Create separate models for each input with unique names
    base_rp = Model(inputs=inputs_rp, outputs=base_model(inputs_rp), name='resnet50_rp')
    base_gaf = Model(inputs=inputs_gaf, outputs=base_model(inputs_gaf), name='resnet50_gaf')
    base_mtf = Model(inputs=inputs_mtf, outputs=base_model(inputs_mtf), name='resnet50_mtf')
    
    x_rp = base_rp.output
    x_rp = GlobalAveragePooling2D()(x_rp)
    
    x_gaf = base_gaf.output
    x_gaf = GlobalAveragePooling2D()(x_gaf)
    
    x_mtf = base_mtf.output
    x_mtf = GlobalAveragePooling2D()(x_mtf)
    
    merged = Concatenate()([x_rp, x_gaf, x_mtf])
    
    x = Dense(512, activation='relu', name='dense_512')(merged)
    x = Dropout(0.5, name='dropout_512')(x)
    x = Dense(128, activation='relu', name='dense_128')(x)
    x = Dropout(0.5, name='dropout_128')(x)
    outputs = Dense(num_classes, activation='softmax', name='output')(x)
    
    model = Model(inputs=[inputs_rp, inputs_gaf, inputs_mtf], outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 5

# Build ResNet50 model
resnet50_model = build_resnet50_model(input_shape, num_classes)
resnet50_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
resnet50_checkpoint = ModelCheckpoint(filepath=os.path.join(save_dir, 'resnet50_best_model.keras'), monitor='val_loss', save_best_only=True)

# Clear any existing TensorFlow sessions
tf.keras.backend.clear_session()

# Train ResNet50 model
print("Starting training...")
steps_per_epoch = len(y_train) // batch_size
validation_steps = len(y_val) // batch_size
resnet50_history = resnet50_model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=10,
    validation_data=val_gen,
    validation_steps=validation_steps,
    callbacks=[early_stopping, resnet50_checkpoint, PrintLogger()],
    verbose=1
)

# Evaluate ResNet50 model on the test set
print("Evaluating on test set...")
test_steps = len(y_test) // batch_size
resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate(test_gen, steps=test_steps)
print(f'ResNet50 Test loss: {resnet50_test_loss:.4f}')
print(f'ResNet50 Test accuracy: {resnet50_test_accuracy:.4f}')

# Function to plot learning curves
def plot_learning_curve(history, model_name):
    plt.figure(figsize=(12, 8))

    # Plot training & validation accuracy values
    plt.subplot(2, 1, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model accuracy - {model_name}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model loss - {model_name}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot learning curve for ResNet50
plot_learning_curve(resnet50_history, 'ResNet50')

# Save the final ResNet50 model
resnet50_model.save(os.path.join(save_dir, 'resnet50_model.keras'))

# Save the training history
with open(os.path.join(save_dir, 'resnet50_history.json'), 'w') as f:
    json.dump(resnet50_history.history, f)

print(f"ResNet50 model and history saved in directory: {save_dir}")


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPUs will likely run quickly with dtype policy mixed_float16 as they all have compute capability of at least 7.0


[2024-07-07 18:03:41,879] [tensorflow] [INFO] Mixed precision compatibility check (mixed_float16): OK
Your GPUs will likely run quickly with dtype policy mixed_float16 as they all have compute capability of at least 7.0


Loading data...


[2024-07-07 18:04:50,562] [IMG] [INFO] Initializing image batcher
[2024-07-07 18:04:50,564] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 18:04:50,564] [IMG] [DEBUG] Initializing indices. Shuffle: False
[2024-07-07 18:04:50,565] [IMG] [INFO] Successfully created image batcher


Data loaded.
Preprocessing images...


  resized_images = resized_images.astype(self.dtype)
[2024-07-07 18:05:08,019] [IMG] [INFO] Batch completed
[2024-07-07 18:05:11,658] [IMG] [INFO] Initializing image batcher
[2024-07-07 18:05:11,659] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 18:05:11,659] [IMG] [DEBUG] Initializing indices. Shuffle: False
[2024-07-07 18:05:11,660] [IMG] [INFO] Successfully created image batcher
[2024-07-07 18:05:28,063] [IMG] [INFO] Batch completed
[2024-07-07 18:05:30,975] [IMG] [INFO] Initializing image batcher
[2024-07-07 18:05:30,975] [IMG] [DEBUG] Image Fields...             
	Number of images: 70480             
	Type: uint8             
	Batch Shape: (32, 201, 224, 224)             
	Batch Size: 322732032             
	Batch Bytes: 322732032
[2024-07-07 18:05:30,976] [IMG] [DEBUG] Initializing indices. Shuffle: Fal

Images preprocessed.
Starting training...

Epoch 1 started.
Epoch 1/10
    3/70480 [..............................] - ETA: 1:08:27 - loss: 2.6208 - accuracy: 0.0000e+00 

2024-07-07 18:06:40.869989: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fe3800060a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-07-07 18:06:40.870028: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3080, Compute Capability 8.6
2024-07-07 18:06:40.870034: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA GeForce RTX 3080, Compute Capability 8.6
2024-07-07 18:06:40.896064: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1720400801.086682  128080 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 1 ended.
Accuracy: 0.8941 - Loss: 0.4847 - Val Accuracy: 0.8947 - Val Loss: 0.4291

Epoch 2 started.
Epoch 2/10
Epoch 2 ended.
Accuracy: 0.8947 - Loss: 0.4333 - Val Accuracy: 0.8947 - Val Loss: 0.4265

Epoch 3 started.
Epoch 3/10
Epoch 3 ended.
Accuracy: 0.8947 - Loss: 0.4270 - Val Accuracy: 0.8947 - Val Loss: 0.4265

Epoch 4 started.
Epoch 4/10
Epoch 4 ended.
Accuracy: 0.8947 - Loss: 0.4260 - Val Accuracy: 0.8947 - Val Loss: 0.4264

Epoch 5 started.
Epoch 5/10

: 

In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))




2024-07-02 17:31:14.249405: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-02 17:31:14.392130: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-02 17:31:14.392167: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-02 17:31:14.419977: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-02 17:31:14.462066: I tensorflow/core/platform/cpu_feature_guar

Num GPUs Available:  2


In [3]:
import gc
import tensorflow as tf
from tensorflow.keras import backend as K

# Trigger garbage collection
gc.collect()

# Clear TensorFlow session
K.clear_session()

# Reset the default graph (for TensorFlow 1.x)
tf.compat.v1.reset_default_graph()

# Free up memory in TensorFlow
tf.compat.v1.Session().close()

# Alternatively, use the following if you are using TensorFlow 2.x:
tf.keras.backend.clear_session()


2024-07-07 18:02:26.863247: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1063 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:1a:00.0, compute capability: 8.6
2024-07-07 18:02:26.863372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 7883 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:68:00.0, compute capability: 8.6
