In [None]:
import os
import sys
from pathlib import Path

In [None]:
current_file_path = os.path.abspath('')
root_file_path = str(Path(current_file_path).parent)
sys.path.append(root_file_path)

In [None]:
from deep_globe_seg.helpers import list_subfolders_and_files, CFG, visualize, round_clip_0_1, denormalize, Dataset, get_preprocessing, get_training_augmentation, Dataloader

In [None]:
# Define custom parameters
custom_dataset_params = {
    'img_size': (512, 512),
    'input_shape': (512, 512, 3),
}

custom_hyper_params = {
    'batch_size': 16,
    'learning_rate': 0.0001,
    'num_classes': 1,
    'epochs': 40
}

# Initialize the CFG class with custom parameters
configs = CFG(image_folder=os.path.join(root_file_path, 'data'),
                  saved_model_folder=os.path.join(root_file_path, 'saved_models'),
                  tensorboard_logs_path=os.path.join(root_file_path, 'logs'),
                  model_type='unet',
                  hyper_params= custom_hyper_params,
                  dataset_params= custom_dataset_params)

In [None]:
Path(configs.Path.saved_model_folder).mkdir(parents = True, exist_ok = True)
Path(configs.Path.tensorboard_logs_path).mkdir(parents = True, exist_ok = True)

In [None]:
# Replace 'your_base_folder_path' with the path to your base folder
list_subfolders_and_files(configs.Path.image_folder)

In [None]:
# Import CV packages
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from pathlib import Path
import albumentations as A

# Import tensorflow, keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Import image segmentation package
import os
os.environ['SM_FRAMEWORK'] = "tf.keras"
import segmentation_models as sm

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Read metadata and filter for training data
metadata_df = pd.read_csv(configs.Path.metadata)
metadata_df = metadata_df[metadata_df['split'] == 'train']

# Select relevant columns and update paths
metadata_df = metadata_df[['image_id', 'sat_image_path', 'mask_path']]
metadata_df['sat_image_path'] = metadata_df['sat_image_path'].apply(lambda img_pth: os.path.join(configs.Path.image_folder, img_pth))
metadata_df['mask_path'] = metadata_df['mask_path'].apply(lambda mask_pth: os.path.join(configs.Path.image_folder, mask_pth))

# Shuffle DataFrame
metadata_df = metadata_df.sample(frac=1, random_state=9).reset_index(drop=True)

# Split into train, validation, and test sets (80/10/10 split)
test_df = metadata_df.sample(frac=0.1, random_state=9)
remaining_df = metadata_df.drop(test_df.index)
valid_df = remaining_df.sample(frac=0.1, random_state=9)
train_df = remaining_df.drop(valid_df.index)

# Print informative message about dataset split
print("Dataset successfully loaded and split into training, validation, and test sets:")
print(f"Number of samples in training set: {len(train_df)}")
print(f"Number of samples in validation set: {len(valid_df)}")
print(f"Number of samples in test set: {len(test_df)}")

In [None]:
metadata_df.head(3)

In [None]:
train_df.head(3)

In [None]:
valid_df.head(3)

In [None]:
test_df.head(3)

In [None]:
class_dict = pd.read_csv(configs.Path.class_dict)
# Get class names
class_names = class_dict['name'].tolist()
# Get class RGB values
class_rgb_values = class_dict[['r','g','b']].values.tolist()

print('All dataset classes and their corresponding RGB values in labels:')
print('Class Names: ', class_names)
print('Class RGB values: ', class_rgb_values)

In [None]:
BACKBONE = 'resnet50'
BATCH_SIZE = configs.HyperParameter.batch_size
CLASSES = ['road']
LR = configs.HyperParameter.learning_rate
EPOCHS = configs.HyperParameter.epochs

In [None]:
preprocess_input = sm.get_preprocessing(BACKBONE)

In [None]:
# define network parameters
n_classes = 1 if len(CLASSES) == 1 else (len(CLASSES) + 1)  # case for binary and multiclass segmentation
activation = 'sigmoid' if n_classes == 1 else 'softmax'

#create model
model = sm.Unet(BACKBONE, classes=n_classes, activation=activation)

In [None]:
# define optomizer
optim = keras.optimizers.Adam(LR)

# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
dice_loss = sm.losses.DiceLoss()
focal_loss = sm.losses.BinaryFocalLoss() if n_classes == 1 else sm.losses.CategoricalFocalLoss()
total_loss = dice_loss + (1 * focal_loss)

# Evaluation metrics
metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)]

# compile keras model with defined optimizer, loss and metrics
model.compile(optim, total_loss, metrics)

In [None]:
# Dataset for train images
train_dataset = Dataset(
    train_df,
    shape=configs.Dataset.img_size,
    classes=CLASSES,
    preprocessing=get_preprocessing(preprocess_input),
    augmentation=get_training_augmentation(configs.Dataset.img_size)
)

# Dataset for validation images
valid_dataset = Dataset(
    valid_df,
    shape=configs.Dataset.img_size,
    classes=CLASSES,
    preprocessing=get_preprocessing(preprocess_input)
)

train_dataloader = Dataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader = Dataloader(valid_dataset, batch_size=1, shuffle=False)

In [None]:
first_train_image = train_dataset[0]
first_valid_image = valid_dataset[0]

In [None]:
first_train_image[0].shape

In [None]:
first_train_image[1].shape

In [None]:
visualize(first_image=first_train_image[0], second_image=first_train_image[1])

In [None]:
# check shapes for errors
assert train_dataloader[0][0].shape == (BATCH_SIZE, *configs.Dataset.img_size, 3)
assert train_dataloader[0][1].shape == (BATCH_SIZE, *configs.Dataset.img_size, n_classes)

In [None]:
# define callbacks for learning rate scheduling and best checkpoints saving
callbacks = [
    keras.callbacks.ModelCheckpoint(configs.Path.model_save_path, save_weights_only=True, save_best_only=True, mode='min'),
    keras.callbacks.ReduceLROnPlateau(),
    keras.callbacks.EarlyStopping(monitor='val_loss', 
                                  patience=3,
                                  verbose=1, 
                                  mode='min',
                                  restore_best_weights=True)
]

In [None]:
# train model
if os.path.isfile(configs.Path.model_save_path):
    print("INFO ===========Running the Partially Trained Model===============")
    model.load_weights(configs.Path.model_save_path)
    history = model.fit(
        train_dataloader, 
        steps_per_epoch=len(train_dataloader), 
        epochs=EPOCHS, 
        callbacks=callbacks, 
        validation_data=valid_dataloader, 
        validation_steps=len(valid_dataloader),
    )
else:
    print("INFO ===========Running the Training of Model from Scratch===============")
    history = model.fit(
        train_dataloader, 
        steps_per_epoch=len(train_dataloader), 
        epochs=EPOCHS, 
        callbacks=callbacks, 
        validation_data=valid_dataloader, 
        validation_steps=len(valid_dataloader),
    )

In [None]:
# Plot training & validation iou_score values
plt.figure(figsize=(30, 5))
# plt.subplot(121)
plt.plot(history.history['iou_score'])
plt.plot(history.history['val_iou_score'])
plt.title('Model iou_score')
plt.ylabel('iou_score')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.show()

# Plot training & validation loss values
# plt.subplot(122)
plt.figure(figsize=(30, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.show()

plt.figure(figsize=(30, 5))
plt.plot(history.history['f1-score'])
plt.plot(history.history['val_f1-score'])
plt.title('Model loss')
plt.ylabel('f1-score')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.show()

In [None]:
# load best weights
model.load_weights(cfg.Path.model_save_path) 

In [None]:
scores = model.evaluate_generator(train_dataloader) # test_dataloader

print("Training loss: {:.5}".format(scores[0]))
for metric, value in zip(metrics, scores[1:]):
    print("mean training {}: {:.5}".format(metric.__name__, value))

In [None]:
scores = model.evaluate_generator(valid_dataloader) # test_dataloader

print("Validation loss: {:.5}".format(scores[0]))
for metric, value in zip(metrics, scores[1:]):
    print("mean validation {}: {:.5}".format(metric.__name__, value))


In [None]:
n = 3
# test_dataset
ids = np.random.choice(np.arange(len(valid_dataset)), size=n)

for i in ids:
    
    image, gt_mask = valid_dataset[i]  # test_dataset
    image = np.expand_dims(image, axis=0)
    pr_mask = model.predict(image).round()
    
    visualize(
        image=denormalize(image.squeeze()),
        gt_mask=gt_mask[..., 0].squeeze(),
        pr_mask=pr_mask[..., 0].squeeze(),
    )