In [None]:
# This code block below is to train a U-Net model for semantic segmentation of forested and non-forested areas using satellite images.

# Importing necessary packages
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import rasterio
import cv2
import glob
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2

# Defining U-Net Model and adding regularization
def unet_model(input_size=(256, 256, 4)):
    inputs = Input(input_size)

    # Encoding Path
    conv1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(inputs)
    conv1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    pool1 = Dropout(0.3)(pool1)

    conv2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(pool1)
    conv2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    pool2 = Dropout(0.3)(pool2)

    conv3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(pool2)
    conv3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    pool3 = Dropout(0.3)(pool3)

    # Bottleneck
    conv4 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(pool3)
    conv4 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv4)
    conv4 = Dropout(0.5)(conv4)

    # Decoding Path
    up5 = UpSampling2D(size=(2, 2))(conv4)
    merge5 = concatenate([conv3, up5], axis=3)
    conv5 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(merge5)
    conv5 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv5)

    up6 = UpSampling2D(size=(2, 2))(conv5)
    merge6 = concatenate([conv2, up6], axis=3)
    conv6 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(merge6)
    conv6 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv6)

    up7 = UpSampling2D(size=(2, 2))(conv6)
    merge7 = concatenate([conv1, up7], axis=3)
    conv7 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(merge7)
    conv7 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(conv7)

    conv8 = Conv2D(1, (1, 1), activation='sigmoid')(conv7)

    model = Model(inputs=inputs, outputs=conv8)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Loading data and resizing images
def load_data(image_paths, mask_paths, target_size=(256, 256)):
    images = []
    masks = []

    for img_path, mask_path in zip(image_paths, mask_paths):
        # Loading GeoTIFF images
        with rasterio.open(img_path) as src:
            image = src.read()
            image = np.transpose(image, (1, 2, 0))  # Reorder dimensions to (height, width, channels)
            image = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)  # Resize image
            image = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

        # Loading mask
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)  # Resize mask
        mask = cv2.threshold(mask, 127, 1, cv2.THRESH_BINARY)[1]  # Binarize mask

        images.append(image)
        masks.append(mask)

    images = np.array(images)
    masks = np.array(masks)
    masks = np.expand_dims(masks, axis=-1)  # Add channel dimension to masks

    return images, masks

# Paths to datasets
train_image_folder = "/content/drive/MyDrive/folder/AMAZON-1/Training/image/"
train_mask_folder = "/content/drive/MyDrive/folder/AMAZON-1/Training/label/"

validation_image_folder = "/content/drive/MyDrive/folder/AMAZON-1/Validation/images/"
validation_mask_folder = "/content/drive/MyDrive/folder/AMAZON-1/Validation/masks/"

# Getting all image and mask paths
train_image_paths = sorted(glob.glob(train_image_folder + "/*.tif"))
train_mask_paths = sorted(glob.glob(train_mask_folder + "/*.tif"))

validation_image_paths = sorted(glob.glob(validation_image_folder + "/*.tif"))
validation_mask_paths = sorted(glob.glob(validation_mask_folder + "/*.tif"))

# Load training and dalidation Data
X_train, y_train = load_data(train_image_paths, train_mask_paths, target_size=(256, 256))
X_val, y_val = load_data(validation_image_paths, validation_mask_paths, target_size=(256, 256))

# Augmenting data for training to make the dataset less simple
data_gen_args = dict(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_datagen = ImageDataGenerator(**data_gen_args)

# Create the training generator
train_generator = train_datagen.flow(X_train, y_train, batch_size=8)

# Training the model with augmentation
model = unet_model(input_size=(256, 256, 4))
model.fit(train_generator, validation_data=(X_val, y_val), epochs=5, verbose=1)

# Saving the model
model.save('forest_segmentation_amazon.h5')
