In [36]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

from utilss import load_masks, get_2d_mask_from_rle

from constants import IMAGE_PATH


In [37]:
masks_df = load_masks()   # ImageId, EncodedPixels

# Some EncodedPixels are empty strings → treat as NaN
masks_df['EncodedPixels'] = masks_df['EncodedPixels'].replace('', np.nan)

In [38]:
df = masks_df.groupby('ImageId')['EncodedPixels'] \
             .apply(lambda x: 1 if x.notna().any() else 0) \
             .reset_index()

df = df.rename(columns={'EncodedPixels': 'label'})

df_ship = df[df['label'] == 1]
df_empty = df[df['label'] == 0]

N = 5000

df_ship_sampled = df_ship.sample(n=min(N, len(df_ship)), random_state=42)
df_empty_sampled = df_empty.sample(n=min(N, len(df_empty)), random_state=42)

df_balanced = pd.concat([df_ship_sampled, df_empty_sampled]).reset_index(drop=True)

train_df, temp_df = train_test_split(
    df_balanced,
    test_size=0.2,
    random_state=42,
    stratify=df_balanced['label']
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=42,
    stratify=temp_df['label']
)


In [39]:
mask_dict = masks_df.groupby("ImageId")["EncodedPixels"].apply(list).to_dict()

In [40]:
mask_dict["000194a2d.jpg"]

['360486 1 361252 4 362019 5 362785 8 363552 10 364321 10 365090 9 365858 10 366627 10 367396 9 368165 9 368933 10 369702 10 370471 9 371240 9 372009 9 372777 10 373546 9 374315 9 375084 7 375852 6 376621 3 377390 1',
 '51834 9 52602 9 53370 9 54138 9 54906 9 55674 7 56442 7 57210 7 57978 7 58746 7 59514 7 60282 7 61050 9 61818 9 62586 9 63354 9 64122 9 64890 9',
 '198320 10 199088 10 199856 10 200624 10 201392 10 202160 10 202928 10 203696 10 204464 10 205232 10 206000 10 206768 10 207536 10 208304 10 209072 10 209840 10 210608 10 211376 10 212144 10 212912 10 213680 10 214448 10 215216 10 215984 10 216751 10 217519 10 218287 10 219055 10 219823 10 220591 10 221359 10 222127 10 222895 10 223663 10 224431 10 225199 10 225967 10 226735 10 227503 10 228271 10 229039 10 229807 10 230575 10 231343 10 232111 10 232879 10 233647 10 234415 10',
 nan,
 '254389 9 255157 17 255925 17 256693 17 257461 17 258229 17 258997 17 259765 17 260533 17 261301 17 262068 18 262836 17 263604 17 264372 17 265

In [41]:
def load_image_and_mask(img_id):
    img_id = img_id.numpy().decode("utf-8")

    path = os.path.join(IMAGE_PATH, img_id)
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [256, 256])
    img = img / 255.0

    rles = mask_dict.get(img_id, [])
    mask = np.zeros((768, 768), dtype=np.uint8)

    for rle in rles:
        if isinstance(rle, str):
            mask += get_2d_mask_from_rle(rle)

    mask = tf.convert_to_tensor(mask[..., None], dtype=tf.float32)
    mask = tf.image.resize(mask, [256, 256])
    mask = tf.cast(mask > 0, tf.float32)

    return img, mask

In [42]:
def tf_load_image_and_mask(img_id):
    img, mask = tf.py_function(
        load_image_and_mask,
        [img_id],
        [tf.float32, tf.float32]
    )
    img.set_shape([256, 256, 3])
    mask.set_shape([256, 256, 1])
    return img, mask

In [43]:
def make_dataset(df, batch_size=8, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices(df["ImageId"].values)
    dataset = dataset.map(tf_load_image_and_mask, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        dataset = dataset.shuffle(1000)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

In [44]:
train_ds = make_dataset(train_df)
val_ds   = make_dataset(val_df, shuffle=False)
test_ds  = make_dataset(test_df, shuffle=False)

In [50]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def dice_loss(y_true, y_pred, smooth=1e-6):
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    return 1 - (2 * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)

# ---- Binary crossentropy + Dice combined ----
bce = tf.keras.losses.BinaryCrossentropy()
def combined_loss(y_true, y_pred):
    return 0.5 * bce(y_true, y_pred) + 0.5 * dice_loss(y_true, y_pred)

# ---- IoU metric (you already defined) ----
def iou_metric(y_true, y_pred, smooth=1e-6):
    y_pred = tf.cast(y_pred > 0.5, tf.float32)
    intersection = tf.reduce_sum(y_true * y_pred)
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    return (intersection + smooth) / (union + smooth)

def tiny_unet(input_shape=(256, 256, 3)):
    inputs = layers.Input(shape=input_shape)

    c1 = layers.Conv2D(16, 3, activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(16, 3, activation='relu', padding='same')(c1)
    p1 = layers.MaxPool2D()(c1)

    c2 = layers.Conv2D(32, 3, activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(32, 3, activation='relu', padding='same')(c2)
    p2 = layers.MaxPool2D()(c2)

    b = layers.Conv2D(64, 3, activation='relu', padding='same')(p2)
    b = layers.Conv2D(64, 3, activation='relu', padding='same')(b)

    u2 = layers.UpSampling2D()(b)
    u2 = layers.Concatenate()([u2, c2])
    c3 = layers.Conv2D(32, 3, activation='relu', padding='same')(u2)
    c3 = layers.Conv2D(32, 3, activation='relu', padding='same')(c3)

    u1 = layers.UpSampling2D()(c3)
    u1 = layers.Concatenate()([u1, c1])
    c4 = layers.Conv2D(16, 3, activation='relu', padding='same')(u1)
    c4 = layers.Conv2D(16, 3, activation='relu', padding='same')(c4)

    outputs = layers.Conv2D(1, 1, activation='sigmoid')(c4)
    return Model(inputs, outputs)

model = tiny_unet()

model.compile(optimizer="adam", loss=combined_loss, metrics=[iou_metric])


In [51]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20
)

Epoch 1/20
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m783s[0m 780ms/step - iou_metric: 0.1590 - loss: 0.4094 - val_iou_metric: 0.2273 - val_loss: 0.3534
Epoch 2/20
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m795s[0m 792ms/step - iou_metric: 0.2817 - loss: 0.3167 - val_iou_metric: 0.3418 - val_loss: 0.2769
Epoch 3/20
[1m 616/1000[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m4:38[0m 724ms/step - iou_metric: 0.2802 - loss: 0.3150

KeyboardInterrupt: 