In [None]:
# wandb is used to track the training process
!pip install wandb
!wandb login Login_Token

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2 as cv
import os
import wandb
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint


##Preparing and Reading Data

In [None]:
# Start a run, tracking hyperparameters
wandb.init(
    # set the wandb project where this run will be logged
    project="Training U_Net",

    # track hyperparameters and run metadata with wandb.config
    config={
        "optimizer": "adam",
        "loss": "binary_crossentropy",
        "metric": ("accuracy", "MeanIoU"),
        "epoch": 5,
        "batch_size": 32
    }
)

config = wandb.config

In [None]:
# Path to train database
database_path = "path to database"
database = pd.read_csv(database_path)

In [None]:
# this code extracts the contents of multiple zip files located in a specified directory
# (dir_path) and extracts them into a different directory (new_dir). It iterates over unique
# names in a database, assumes the zip files have the same name as the "name" column, and 
# extracts the contents of each zip file into the target directory. {new_dir}
#└── {WSI_Name}
#    ├── {WSI_Name}_img
#    │   └── img_patches
#    └── {WSI_Name}_mask
#        └── mask_patches
#------------------------------------------------------#

import zipfile
names = database.name.unique()
dir_path = "path of the directory the ziped data"

new_dir = "path of the directory where the zipped data will be extracted "
for name in names:
    zip_file_path = dir_path+ name + '.zip'  # Assuming the zip files have the same name as the "name" column

    if os.path.exists(zip_file_path):
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(new_dir)
            print(f"Successfully extracted {zip_file_path}")
    else:
        print(f"Zip file {zip_file_path} does not exist")

In [14]:
# shuffle and split data into training and validation
shuffled_data = database.sample(frac=1., random_state=42)
train_data, val_data = train_test_split(shuffled_data, test_size=.1, random_state=42)

## U-Net Architecture

In [None]:
def double_conv_block(x, n_filters):

    # Conv2D then ReLU activation
    x = layers.Conv2D(n_filters, 3, padding = "same", activation = "relu", kernel_initializer = "he_normal")(x)
    # Conv2D then ReLU activation
    x = layers.Conv2D(n_filters, 3, padding = "same", activation = "relu", kernel_initializer = "he_normal")(x)

    return x

In [None]:
def downsample_block(x, n_filters):
    f = double_conv_block(x, n_filters)
    p = layers.MaxPool2D(2)(f)
    p = layers.Dropout(0.3)(p)

    return f, p

In [None]:
def upsample_block(x, conv_features, n_filters):
    # upsample
    x = layers.Conv2DTranspose(n_filters, 3, 2, padding="same")(x)
    # concatenate 
    x = layers.concatenate([x, conv_features])
    # dropout
    x = layers.Dropout(0.3)(x)
    # Conv2D twice with ReLU activation
    x = double_conv_block(x, n_filters)

    return x

In [None]:
def build_unet_model():

    # inputs
    inputs = layers.Input(shape=(256,256,3))

    # encoder: contracting path - downsample
    # 1 - downsample
    f1, p1 = downsample_block(inputs, 64)
    # 2 - downsample
    f2, p2 = downsample_block(p1, 128)
    # 3 - downsample
    f3, p3 = downsample_block(p2, 256)
    # 4 - downsample
    f4, p4 = downsample_block(p3, 512)

    # 5 - bottleneck
    bottleneck = double_conv_block(p4, 1024)

    # decoder: expanding path - upsample
    # 6 - upsample
    u6 = upsample_block(bottleneck, f4, 512)
    # 7 - upsample
    u7 = upsample_block(u6, f3, 256)
    # 8 - upsample
    u8 = upsample_block(u7, f2, 128)
    # 9 - upsample
    u9 = upsample_block(u8, f1, 64)

    # outputs
    outputs = layers.Conv2D(1, 1, padding="same", activation = "sigmoid")(u9)

    # unet model with Keras Functional API
    unet_model = tf.keras.Model(inputs, outputs, name="U-Net")

    return unet_model

In [None]:
def mean_iou_manual_tf(y_true, y_pred):
    y_pred = tf.round(y_pred)
    y_true = tf.reshape(y_true, [-1])
    y_pred = tf.reshape(y_pred, [-1])

    ious = []
    for val in [0, 1]:  # Assuming binary segmentation
        y_true_binary = tf.cast(tf.equal(y_true, val), tf.float32)
        y_pred_binary = tf.cast(tf.equal(y_pred, val), tf.float32)

        intersection = tf.reduce_sum(y_true_binary * y_pred_binary)
        union = tf.reduce_sum(y_true_binary) + tf.reduce_sum(y_pred_binary) - intersection
        iou = intersection / union
        ious.append(iou)
    
    return tf.reduce_mean(ious)

In [None]:
unet_model = build_unet_model()
unet_model.compile(optimizer=config.optimizer, loss=config.loss, metrics=["accuracy", mean_iou_manual_tf])

In [None]:
unet_model.summary()

## Model Training

In [None]:
# This function loop through all the images and generate batch of images for testing
#------------------------------------------------------#

dir_path = "path to the extracted patches"

def train_generator(data, batch_size):
    while True:
        print("entered While")
        data = data.sample(frac=1.)  # Shuffle the data
        for start in range(0, len(data), batch_size):
            x_batch = []
            y_batch = []
            end = min(start + batch_size, len(data))
            df_batch = data[start:end]
            for _, row in df_batch.iterrows():
                img = cv.imread(dir_path + row['img_path'])
                mask = cv.imread(dir_path + row['mask_path'], cv.IMREAD_GRAYSCALE)
                if img is None:
                    print(f"Image not loaded: '{row['image_name']}'")
                    continue
                    
                if mask is None:
                    print(f"Mask not loaded: {row['mask_name']}")
                    continue                
                
                img = img / 255.  # Normalize image

                x_batch.append(img)
                y_batch.append(mask)
            yield np.array(x_batch), np.array(y_batch)
          

# Get generator
batch_size = config.batch_size
train_gen = train_generator(train_data, batch_size=batch_size)
val_gen = train_generator(val_data, batch_size=batch_size)
# Set up model checkpoint
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint('/kaggle/working/model_checkpoint.h5', save_best_only=True)


val_steps = len(val_data) // batch_size

# Train the model
history = unet_model.fit(train_gen, validation_data=val_gen, steps_per_epoch=len(train_data) // batch_size ,epochs=config.epoch, validation_steps=val_steps, callbacks=[checkpoint_cb, WandbMetricsLogger(log_freq=5),
                      WandbModelCheckpoint("models")])
new_history = history.history


# Save the final model
unet_model.save('model_name.h5')
wandb.finish()
