config.py:



In [None]:
import os

# change 
MODE = 'train model'
MODE = 'test model'

# model configs
NUM_SAMPLES = 3000  # not a total number of samples, but max number of samples with the same 'ship_count'
VALIDATION_SET_SIZE = 0.1 # number from 0 to 1.0
GAUSSIAN_NOISE = 0.1
NB_EPOCHS = 40
BATCH_SIZE = 64
PATCH_SIZE = 256
INPUT_DATA_DIM = (PATCH_SIZE, PATCH_SIZE, 3)

# env_configs
BASE_DIR = '/kaggle/input/airbus-ship-detection'
TEST_IMG_DIR = os.path.join(BASE_DIR,'test_v2')
TRAIN_IMG_DIR = os.path.join(BASE_DIR,'train_v2')
TRAIN_DATASET_CSV = os.path.join(BASE_DIR,'train_ship_segmentations_v2.csv')

# WEIGHTS_DIR = 'weights'
# WEIGHTS_FILE = 'model.{epoch:02d}-{val_loss:.2f}.weights.h5'
# WEIGHTS_PATH = os.path.join(WEIGHTS_DIR, WEIGHTS_FILE)

MODEL_DIR = 'model'
MODEL_FILE = 'model.{epoch:02d}-{val_loss:.2f}.keras'
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)

# model testing configs
MODEL_TO_TEST = '/kaggle/working/model/model.05-1.08.keras'
# MODEL_TO_TEST_PATH = os.path.join(MODEL_DIR, MODEL_TO_TEST)
MODEL_TO_TEST_PATH = os.path.join(MODEL_TO_TEST)

utils.py:

In [None]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from skimage.transform import resize
import matplotlib.pyplot as plt

#utility functions
def rle_to_mask(starts, lengths, height, width):
    # Create an empty array of zeros of shape (height, width)
    mask = np.zeros(height * width, dtype=np.uint8)

    # For each start and length, set the corresponding values in the mask to 1
    for start, length in zip(starts, lengths):
        mask[start:start + length] = 1

    # Reshape the mask into the desired dimensions
    mask = mask.reshape((height, width))
    mask = mask.T
    return mask

def create_mask(mask_array, width=768, height=768):
    masks = np.zeros((width, height), dtype=np.int16)
    # if element == element:
    if isinstance(mask_array, str):
        split = mask_array.split()
        startP, lengthP = [np.array(x, dtype=int) for x in (split[::2], split[1::2])]
        masks += (rle_to_mask(startP, lengthP, width, height))
    return masks

def generate_prediction(model, img_dir, img_name):
    img = os.path.join(img_dir, img_name)
    img = Image.open(img)
    img = np.array(img)
    img = resize(img, (PATCH_SIZE, PATCH_SIZE), anti_aliasing=True)
    img = tf.expand_dims(img, axis=0)
    pred = model.predict(img)
    print(f"prediction shape - {pred.shape}")
    return pred, img

def visualise_prediction(model, img_dir, img_name):
    pred, img = generate_prediction(model, img_dir, img_name)
    plt.figure(figsize=(10, 10))
    plt.subplot(1, 2, 1)
    plt.imshow(img[0])
    plt.title("Original Image")
    plt.axis("off")
    plt.subplot(1, 2, 2)
    plt.imshow(pred[0])
    plt.title("Predicted Mask")
    plt.axis("off")
    plt.show()

generators.py:

In [None]:
import numpy as np

from PIL import Image
from skimage.transform import resize
from tensorflow.keras.preprocessing.image import ImageDataGenerator


def img_gen(input_df, batch_size = BATCH_SIZE, patch_size = PATCH_SIZE):
    # shuffle the dataset
    input_df = input_df.sample(frac=1, random_state=42).reset_index(drop=True)
    out_rgb = []
    out_mask = []
    while True:
        for index, row in input_df.iterrows():
            rgb_path = os.path.join(TRAIN_IMG_DIR, row.ImageId)
            rgb = Image.open(rgb_path)
            rgb = np.array(rgb)/255.0
            rgb = resize(rgb, (patch_size, patch_size), anti_aliasing=True)
            mask = create_mask(row.AllEncodedPixels)
            mask = resize(mask, (patch_size, patch_size), anti_aliasing=True)
#             the next line is 'kostyl' to address min/max mask values beeing equal to 0.0/3.051851e-05
            mask = np.where(mask > 0, 1, 0)
            mask = np.expand_dims(mask, -1)

            for i in range(0, rgb.shape[0], patch_size):
                for j in range(0, rgb.shape[1], patch_size):
                    single_mask_patch = mask[i:i+patch_size, j:j+patch_size]
                    if (single_mask_patch.max()):
                        single_rgb_patch = rgb[i:i+patch_size, j:j+patch_size]
                        out_rgb += [single_rgb_patch]
                        out_mask += [single_mask_patch]
                    if len(out_rgb)>=batch_size:
                        yield np.stack(out_rgb, 0), np.stack(out_mask, 0).astype(np.float32)
                        out_rgb, out_mask=[], []

# arhuments for augmentation image generator
data_gen_args = dict(rotation_range = 90,
                       horizontal_flip = True,
                       vertical_flip = True,
                       data_format = 'channels_last')

image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

# data augmentation generator
def augmentation_generator(input_gen, seed = None):
    random_seed = np.random.randint(0, 10000)
    for input_x, input_y in input_gen:
        augmented_x = image_datagen.flow(
            input_x*255,
            batch_size=input_x.shape[0],
            seed=random_seed
        )

        augmented_y = mask_datagen.flow(
            input_y,
            batch_size=input_y.shape[0],
            seed=random_seed
        )

        yield next(augmented_x)/255.0, next(augmented_y)


losses.py:


In [None]:
import tensorflow.keras.backend as K

# loss functions
def dice_score(y_true, y_pred, smooth=1e-6):
    intersection = K.sum(y_true * y_pred, axis=[1,2,3]  )
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
    return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0)


def dice_loss(y_true, y_pred):
    return 1 - dice_score(y_true, y_pred)

def bce_loss(y_true, y_pred):
    return K.mean(K.binary_crossentropy(y_true, y_pred), axis=[1, 2, 3])

def dice_bce_loss(y_true, y_pred):
    dice_loss_value = dice_loss(y_true, y_pred)
    bce_loss_value = bce_loss(y_true, y_pred)
    return dice_loss_value + bce_loss_value

model.py:

In [None]:
from tensorflow.keras import layers, Model, Input


def unet(input_shape, optimizer, loss, metrics):
    inputs = Input(input_shape)
    inputs = layers.GaussianNoise(GAUSSIAN_NOISE)(inputs)
    inputs = layers.BatchNormalization()(inputs)

    # Encoder
    c1 = layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (inputs)
    c1 = layers.Dropout(0.1) (c1)
    c1 = layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c1)
    p1 = layers.MaxPooling2D((2, 2)) (c1)

    c2 = layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p1)
    c2 = layers.Dropout(0.1) (c2)
    c2 = layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c2)
    p2 = layers.MaxPooling2D((2, 2)) (c2)

    c3 = layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p2)
    c3 = layers.Dropout(0.1) (c3)
    c3 = layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c3)
    p3 = layers.MaxPooling2D((2, 2)) (c3)

    c4 = layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p3)
    c4 = layers.Dropout(0.1) (c4)
    c4 = layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c4)
    p4 = layers.MaxPooling2D((2, 2)) (c4)

    # Bottleneck
    c5 = layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p4)
    c5 = layers.Dropout(0.1) (c5)
    c5 = layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c5)

    # Decoder
    u6 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
    u6 = layers.concatenate([u6, c4])
    c6 = layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u6)
    c6 = layers.Dropout(0.1) (c6)
    c6 = layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c6)

    u7 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
    u7 = layers.concatenate([u7, c3])
    c7 = layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u7)
    c7 = layers.Dropout(0.1) (c7)
    c7 = layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c7)

    u8 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
    u8 = layers.concatenate([u8, c2])
    c8 = layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u8)
    c8 = layers.Dropout(0.1) (c8)
    c8 = layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c8)

    u9 = layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
    u9 = layers.concatenate([u9, c1])
    c9 = layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u9)
    c9 = layers.Dropout(0.1) (c9)
    c9 = layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c9)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid') (c9)

    model = Model(inputs=[inputs], outputs=[outputs])

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    return model

train-model.py:

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau


# Read dataset
df = pd.read_csv(TRAIN_DATASET_CSV)

# Add info about whether a ship is present in the image
df['has_ship'] = df['EncodedPixels'].apply(lambda x: 0 if pd.isna(x) else 1)

# Add info about the total number of ships in the image
df['ship_count'] = df.groupby('ImageId')['EncodedPixels'].transform('count')

# Concatenate all EncodedPixels into AllEncodedPixels
df['AllEncodedPixels'] = df.groupby('ImageId')['EncodedPixels'].transform(
    lambda x: np.nan if x.isna().all() else ' '.join(filter(None, x))
)

# Remove duplicate images
df = df.drop_duplicates(subset='ImageId', keep='first')

# Delete EncodedPixels column
df = df.drop(columns=['EncodedPixels'])

# Reset indexes
df = df.reset_index(drop=True)

# Create a DataFrame to store the balanced dataset
balanced_df = pd.DataFrame()

# Create a balanced dataset
value_counts = df['ship_count'].value_counts()
for value in value_counts.index:
    subset = df[df['ship_count'] == value]
    number_samples = NUM_SAMPLES if NUM_SAMPLES < len(subset) else len(subset)
    resampled_subset = resample(subset, replace=False, n_samples=number_samples, random_state=42)
    balanced_df = pd.concat([balanced_df, resampled_subset])

# Drop images without ships
balanced_df = balanced_df[balanced_df['ship_count'] > 0]

# Split the balanced dataset into train and validation sets
train_ids, validation_ids = train_test_split(balanced_df, test_size=VALIDATION_SET_SIZE, stratify=balanced_df['ship_count'])

train_df = pd.merge(balanced_df, train_ids)
validation_df = pd.merge(balanced_df, validation_ids)

print(f"train_df:\n {train_df.sample(5)}")
print(f"validation_df:\n {validation_df.sample(5)}")

# Create a generator for training data
train_gen = img_gen(train_df)

# Define callbacks for training
tensorboard = TensorBoard(log_dir='logs')

earlystopping = EarlyStopping(
    monitor="val_dice_score",
    mode="max",
    patience=15)

# Check if WEIGHTS_DIR exists, if not create it
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

model_path = MODEL_PATH
checkpoint = ModelCheckpoint(
    filepath=model_path,
    monitor='val_dice_score',
    verbose=1,
    mode='max',
    save_weights_only=False)

reduceLR = ReduceLROnPlateau(
    monitor='val_dice_score',
    factor=0.2,
    patience=3,
    verbose=1,
    mode='max',
    min_delta=0.0001,
    cooldown=2,
    min_lr=1e-6)

callbacks = [tensorboard, earlystopping, checkpoint, reduceLR]

# Calculate the number of steps per epoch
STEP_COUNT = train_df.shape[0] // BATCH_SIZE

# Create an augmented generator for model fitting
model_fit_gen = augmentation_generator(img_gen(train_df, BATCH_SIZE, PATCH_SIZE))

# Create a validation set
validation_test_size = (balanced_df.shape[0] - train_df.shape[0])
validation_x, validation_y = next(img_gen(validation_df, validation_test_size, PATCH_SIZE))

print(f"The size of the training set: {train_df.shape[0]}")
print(f"The size of the validation set: {VALIDATION_SET_SIZE}")
print(f"Steps/Epoch: {STEP_COUNT}")

# Create a MirroredStrategy for distributed training
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

# Open a strategy scope
with strategy.scope():
    # Create the model using the unet function
    model = unet(INPUT_DATA_DIM, optimizer='adam', loss=dice_bce_loss, metrics=[dice_score])
    model.summary()

    # Train the model on all available devices
    loss_history = [model.fit(
                    model_fit_gen,
                    steps_per_epoch=STEP_COUNT,
                    epochs=NB_EPOCHS,
                    validation_data=(validation_x, validation_y),
                    callbacks=callbacks)]
