In [1]:
import tensorflow as tf
import segmentation_models as sm
import h5py
import numpy as np
import matplotlib.pyplot as plt

import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

from glob import glob

from skimage.filters import median
from sklearn import metrics
from sklearn.utils import shuffle
from sklearn.model_selection import KFold



Segmentation Models: using `keras` framework.


In [2]:
data_dir  = '/your_training_data_directory/'
model_dir = '/your_model_directory/'
data_dir  = '/global/homes/m/mgcho/cobalt oxide/data/training/'
model_dir = '/pscratch/sd/m/mgcho/cobalt oxide/model training/seg models/'

In [3]:
image = h5py.File(data_dir + f'training_image_4k.h5','r')['images']
mask = h5py.File(data_dir + f'training_mask_4k.h5','r')['masks']

image = np.expand_dims(image, axis=0)
mask = np.expand_dims(mask, axis=0)

In [4]:
num_images = image.shape[0]
image512 = np.zeros([64*num_images, 512, 512])
mask512 = np.zeros([64*num_images, 512, 512])

for n in range(num_images):
    for i in range(8):
        for j in range(8):
            image512[8*i+j+64*n,:,:] = image[n, 512*i:512*(i+1), 512*j:512*(j+1)]
            mask512[8*i+j+64*n,:,:] = mask[n, 512*i:512*(i+1), 512*j:512*(j+1)]
image = image512
mask = mask512

In [5]:
# image stadardization
image_stnd = np.empty([image.shape[0],512,512])

for i in range(0,image.shape[0]):
    img_mean = np.mean(image[i])
    img_std = np.std(image[i])
    image_stnd[i] = (image[i] - img_mean)/img_std
    
# mask channels
mask = np.isin(mask, [1, 2])
background_mask = np.abs(1-mask)

In [6]:
# setup train_x
train_x = np.empty([image_stnd.shape[0],512,512,3])
train_x[:,:,:,0] = image_stnd
train_x[:,:,:,1] = image_stnd
train_x[:,:,:,2] = image_stnd

# setup train_y
train_y = np.empty([mask.shape[0],512,512,2])
train_y[:,:,:,0] = background_mask
train_y[:,:,:,1] = mask

trainX = train_x
trainY = train_y

In [7]:
fold = 0
k_folds = 8
batch_size = 8
epochs = 20 #100

seed = 42

# Set the learning rate decay parameters
initial_learning_rate = 0.04
decay_rate = 0.5
decay_steps = 20

# Create learning rate scheduler
def lr_scheduler(epoch):
    """Exponential decay learning rate scheduler."""
    lr = initial_learning_rate * (decay_rate ** (epoch / decay_steps))
    return lr

# Create callback for learning rate scheduler
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

# Split the data into K folds
kf = KFold(n_splits=k_folds, shuffle=True, random_state=seed)
folds = list(kf.split(trainX))

# Create data generator to augment your data
data_gen_args = dict(rotation_range=0, fill_mode='constant', horizontal_flip=True, vertical_flip=True)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

train_index, val_index = folds[fold]

# Get the training and validation data for this fold
x_train_fold, y_train_fold = trainX[train_index], trainY[train_index]
x_val_fold, y_val_fold = trainX[val_index], trainY[val_index]

# Fit the data generators to the training data for this fold
image_datagen.fit(x_train_fold, augment=True)
mask_datagen.fit(y_train_fold, augment=True)

# Create generators for the training and validation data for this fold
image_generator_train = image_datagen.flow(x_train_fold, batch_size=batch_size, seed=seed)
mask_generator_train = mask_datagen.flow(y_train_fold, batch_size=batch_size, seed=seed)
image_generator_val = image_datagen.flow(x_val_fold, batch_size=batch_size, seed=seed)
mask_generator_val = mask_datagen.flow(y_val_fold, batch_size=batch_size, seed=seed)

# Combine the generators for the training and validation data for this fold
train_generator = zip(image_generator_train, mask_generator_train)
val_generator = zip(image_generator_val, mask_generator_val)



In [8]:
# Setup model
model = sm.Unet('resnet18', encoder_weights=None, classes=2, activation='softmax')
model.compile(
    optimizer=Adam(learning_rate=initial_learning_rate),
    loss=sm.losses.cce_dice_loss,
    metrics=[sm.metrics.iou_score, sm.metrics.f1_score],
)

2024-09-17 15:55:41.121906: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-09-17 15:55:43.099399: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38218 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:03:00.0, compute capability: 8.0
2024-09-17 15:55:43.101080: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38218 MB memory:  -> device: 1, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:41:00.0, compute capability: 8.0
2024-09-17 15:55:43.102584: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/devi

In [9]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 data (InputLayer)              [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 bn_data (BatchNormalization)   (None, None, None,   9           ['data[0][0]']                   
                                3)                                                                
                                                                                                  
 zero_padding2d (ZeroPadding2D)  (None, None, None,   0          ['bn_data[0][0]']                
                                3)                                                          

In [10]:
# Set the filenames for the saved model weights and history for this fold
save_weights = model_dir + f'unet_noPretrain_fold{fold+1}_weights.h5'
save_history = model_dir + f'unet_noPretrain_fold{fold+1}_history.h5'

# Setup checkpoints so that you can save during training
model_checkpoint = ModelCheckpoint(save_weights,
                                    monitor = 'val_loss',
                                    save_best_only = True,
                                    mode = 'min',
                                    verbose = 0,
                                    save_weights_only = True)
callbacks_list = [model_checkpoint, lr_callback]

In [11]:
history = model.fit(
    train_generator, epochs=epochs, steps_per_epoch=len(x_train_fold) // batch_size,
    validation_data=val_generator, validation_steps=len(x_val_fold) // batch_size,
    callbacks=callbacks_list,
    verbose=0,
    )

2024-09-17 15:55:47.276276: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8302


In [12]:
h = h5py.File(save_history,'w')
h_keys = history.history.keys()

for k in h_keys:
    h.create_dataset(k,data=history.history[k])
h.close()