In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Input, UpSampling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tqdm import tqdm
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.transform import resize


In [None]:
image_train_val_dir = '/kaggle/input/fire-v11/New_Dataset_v9/train_val/image'
label_train_val_dir = '/kaggle/input/fire-v11/New_Dataset_v9/train_val/label'
image_test_dir = '/kaggle/input/fire-v11/New_Dataset_v9/test/image'
label_test_dir = '/kaggle/input/fire-v11/New_Dataset_v9/test/label'

In [None]:
# Input Size
IMG_HEIGHT = 288
IMG_WIDTH = 288
IMG_CHANNELS = 3

In [None]:
# Get list of files from image and mask folder
image_files = sorted([f for f in os.listdir(image_train_val_dir) if f.endswith('.jpg')])
label_files = sorted([f for f in os.listdir(label_train_val_dir) if f.endswith('.png')])

# Split data into 60% train, 30% validation, 10% test
train_ids, val_ids = train_test_split(image_files, test_size=0.225, random_state=11)
# Get list of files from test directory
test_ids = sorted([f for f in os.listdir(image_test_dir) if f.endswith('.jpg')])

In [None]:
len(val_ids)

In [None]:
# Create X_train and Y_train
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=bool)


# Load data into X_train and Y_train
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    # Read image
    img = imread(os.path.join(image_train_val_dir, id_))[:, :, :IMG_CHANNELS]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_train[n] = img

    # Read mask
    mask_file = id_.replace('.jpg', '_label.png')
    mask = imread(os.path.join(label_train_val_dir, mask_file))

    # If the mask has 3 color channels, convert it to grayscale
    if len(mask.shape) == 3:
        mask = rgb2gray(mask)

    mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    mask = np.expand_dims(mask, axis=-1)
    Y_train[n] = mask
X_train = X_train / 255.0

100%|██████████| 1317/1317 [04:05<00:00,  5.36it/s]


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Loop
for n in range(1, 100):  # Từ 1 đến 200
    plt.figure(figsize=(10, 5))

    # Display root image
    plt.subplot(1, 3, 1)
    plt.imshow(X_val[n])  # Hiển thị ảnh gốc
    plt.title('Original Image')

    # Display binary mask (segmented)
    plt.subplot(1, 3, 2)
    plt.imshow(np.squeeze(Y_val[n]), cmap='gray')
    plt.title('Mask (Binary Segmentation)')

    # Overlay mask into root image
    plt.subplot(1, 3, 3)
    plt.imshow(X_val[n])
    plt.imshow(np.squeeze(Y_val[n]), cmap='jet', alpha=0.5)
    plt.title('Image with Mask Overlay')

    plt.show()

    # # Pause to view each image, press Enter to continue
    # input(f"Display image number {n}. Press Enter to continue...")


In [None]:
# Create X_val and Y_val
X_val = np.zeros((len(val_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_val = np.zeros((len(val_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=bool)  # Giữ định dạng bool cho ground truth

# Load data into X_val and Y_val
for n, id_ in tqdm(enumerate(val_ids), total=len(val_ids)):
    # Read image
    img = imread(os.path.join(image_train_val_dir, id_))[:, :, :IMG_CHANNELS]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_val[n] = img

    # Read mask
    mask_file = id_.replace('.jpg', '_label.png')
    mask = imread(os.path.join(label_train_val_dir, mask_file))

    # If the mask has 3 color channels, convert it to grayscale
    if len(mask.shape) == 3:
        mask = rgb2gray(mask)

    mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    mask = np.expand_dims(mask, axis=-1)
    Y_val[n] = mask

# Chuẩn hóa X_val (ảnh đầu vào)
X_val = X_val / 255.0


100%|██████████| 383/383 [01:24<00:00,  4.54it/s]


In [None]:
# Create X_test and Y_test
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=bool)  # Thay np.bool bằng bool

# Load data into X_test and Y_test
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    # Read image
    img = imread(os.path.join(image_test_dir, id_))[:, :, :IMG_CHANNELS]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

    # Read mask
    mask_file = id_.replace('.jpg', '_label.png')
    mask = imread(os.path.join(label_test_dir, mask_file))

    # Nếu mask có 3 kênh màu, chuyển sang grayscale
    if len(mask.shape) == 3:
        mask = rgb2gray(mask)

    mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    mask = np.expand_dims(mask, axis=-1)  # Thêm chiều cho mask
    Y_test[n] = mask
X_test = X_test / 255.0

100%|██████████| 226/226 [00:37<00:00,  6.00it/s]


#MODEL

##UNET

In [None]:
def unet_model(input_shape):
    """
    Builds a U-Net model for image segmentation.

    Input: input_shape (tuple): Shape of the input image (height, width, channels).

    Output: Compiled U-Net model.
    """
    inputs = layers.Input(shape=input_shape)

    # Contracting path (Encoder)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    # Bottleneck
    c5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p4)
    c5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c5)

    # Expanding path (Decoder)
    u6 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = layers.concatenate([u6, c4])
    c6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c6)

    u7 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = layers.concatenate([u7, c3])
    c7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c7)

    u8 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = layers.concatenate([u8, c2])
    c8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u8)
    c8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c8)

    u9 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = layers.concatenate([u9, c1])
    c9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u9)
    c9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c9)

    c9 = layers.Dropout(0.5)(c9)  # Move Dropout inside main decoder path

    # Output layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

input_shape = (288, 288, 3)
model = unet_model(input_shape)
model.summary()


##SWIN_UNET

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

# Patch Partition: Chia ảnh thành các patch
class PatchPartition(layers.Layer):
    def __init__(self, patch_size):
        super(PatchPartition, self).__init__()
        self.patch_size = patch_size

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]
        channels = inputs.shape[-1]

        # Sử dụng tf.image.extract_patches để chia ảnh thành các patch kích thước patch_size x patch_size
        patches = tf.image.extract_patches(
            images=inputs,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID'
        )

        # Reshape lại thành định dạng (batch_size, height // patch_size, width // patch_size, patch_size * patch_size * channels)
        patches = tf.reshape(patches, (batch_size, height // self.patch_size, width // self.patch_size, self.patch_size * self.patch_size * channels))

        return patches

# Linear Embedding: Nhúng các patch vào không gian tuyến tính
class LinearEmbedding(layers.Layer):
    def __init__(self, embed_dim):
        super(LinearEmbedding, self).__init__()
        self.embed_dim = embed_dim
        self.proj = layers.Dense(embed_dim)  # Sử dụng Dense để giảm chiều về đúng kích thước

    def call(self, inputs):
        return self.proj(inputs)  # Chuyển inputs thông qua Dense để giảm số chiều

class SwinTransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, window_size, mlp_ratio=4, qkv_bias=True, dropout=0.):
        super(SwinTransformerBlock, self).__init__()
        self.embed_dim = embed_dim
        self.window_size = window_size
        self.mlp_ratio = mlp_ratio

        self.norm1 = layers.LayerNormalization()
        self.attn = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.norm2 = layers.LayerNormalization()

        # Đảm bảo rằng đầu ra của MLP phải cùng kích thước với đầu vào để cộng với `shortcut`
        self.mlp = tf.keras.Sequential([
            layers.Dense(embed_dim * mlp_ratio, activation='relu'),  # Lớp MLP ẩn
            layers.Dense(embed_dim)  # Đầu ra phải có kích thước giống với đầu vào
        ])

        # Conv2D để đảm bảo rằng số kênh của shortcut và mlp_output khớp nhau
        self.proj = layers.Conv2D(embed_dim, kernel_size=1)

    def call(self, x):
        shortcut = x  # Lưu đầu vào ban đầu để cộng với đầu ra (skip connection)
        x = self.norm1(x)
        x = self.attn(x, x)  # Self-attention
        x = x + shortcut  # Skip connection

        shortcut = x  # Cập nhật shortcut cho MLP
        x = self.norm2(x)
        mlp_output = self.mlp(x)

        # Điều chỉnh số lượng kênh của shortcut nếu cần thiết
        shortcut = self.proj(shortcut)
        x = mlp_output + shortcut  # Skip connection cho MLP

        return x

# Patch Merging: Giảm kích thước không gian
class PatchMerging(layers.Layer):
    def __init__(self, output_dim):
        super(PatchMerging, self).__init__()
        self.conv = layers.Conv2D(output_dim, kernel_size=2, strides=2)

    def call(self, x):
        return self.conv(x)

# Patch Expanding: Tăng kích thước không gian
class PatchExpanding(layers.Layer):
    def __init__(self, output_dim, scale_factor=2):
        super(PatchExpanding, self).__init__()
        self.conv = layers.Conv2D(output_dim, kernel_size=1)
        self.scale_factor = scale_factor

    def call(self, x):
        x = layers.UpSampling2D(size=(self.scale_factor, self.scale_factor))(x)
        return self.conv(x)

# Linear Projection: Chuyển đổi thành số kênh mong muốn
class LinearProjection(layers.Layer):
    def __init__(self, output_dim):
        super(LinearProjection, self).__init__()
        self.conv = layers.Conv2D(output_dim, kernel_size=1)

    def call(self, x):
        return self.conv(x)


In [None]:
# Xây dựng mô hình Swin U-Net với các giá trị C điều chỉnh
def swin_unet(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    # Patch Partition và Linear Embedding
    x = PatchPartition(patch_size=4)(inputs)
    x = LinearEmbedding(embed_dim=3)(x)  # `C=3`

    # Encoder
    x1 = SwinTransformerBlock(embed_dim=3, num_heads=1, window_size=4)(x)
    x1 = SwinTransformerBlock(embed_dim=3, num_heads=1, window_size=4)(x1)

    x2 = PatchMerging(output_dim=6)(x1)  # `C=6`
    x2 = SwinTransformerBlock(embed_dim=6, num_heads=2, window_size=4)(x2)
    x2 = SwinTransformerBlock(embed_dim=6, num_heads=2, window_size=4)(x2)

    x3 = PatchMerging(output_dim=12)(x2)  # `C=12`
    x3 = SwinTransformerBlock(embed_dim=12, num_heads=4, window_size=4)(x3)
    x3 = SwinTransformerBlock(embed_dim=12, num_heads=4, window_size=4)(x3)

    x4 = PatchMerging(output_dim=24)(x3)  # `C=24`
    x4 = SwinTransformerBlock(embed_dim=24, num_heads=8, window_size=4)(x4)
    x4 = SwinTransformerBlock(embed_dim=24, num_heads=8, window_size=4)(x4)

    # Decoder
    x = PatchExpanding(output_dim=12)(x4)  # `C=12`
    x = layers.Concatenate()([x, x3])
    x = SwinTransformerBlock(embed_dim=12, num_heads=4, window_size=4)(x)
    x = SwinTransformerBlock(embed_dim=12, num_heads=4, window_size=4)(x)

    x = PatchExpanding(output_dim=6)(x)  # `C=6`
    x = layers.Concatenate()([x, x2])
    x = SwinTransformerBlock(embed_dim=6, num_heads=2, window_size=4)(x)
    x = SwinTransformerBlock(embed_dim=6, num_heads=2, window_size=4)(x)

    x = PatchExpanding(output_dim=3)(x)  # `C=3`
    x = layers.Concatenate()([x, x1])
    x = SwinTransformerBlock(embed_dim=3, num_heads=1, window_size=4)(x)
    x = SwinTransformerBlock(embed_dim=3, num_heads=1, window_size=4)(x)

    x = PatchExpanding(output_dim=3, scale_factor=4)(x)  # Trở về WxHx3

    outputs = layers.Conv2D(num_classes, kernel_size=(1, 1), activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model
model = swin_unet(input_shape=(288, 288, 3), num_classes=1)
model.summary()

##U_KAN

In [None]:
from tensorflow.keras import layers, models
import tensorflow as tf

class KANLayer(layers.Layer):
    """
    Custom KAN layer implementing learnable activation functions.

    Input:
        - inputs: Tensor of shape (batch_size, input_dim)

    Output:
        - Transformed tensor of shape (batch_size, output_dim)
    """
    def __init__(self, input_dim, output_dim):
        super(KANLayer, self).__init__()
        # Initialize learnable activation functions as weights
        self.activation_funcs = self.add_weight(
            shape=(output_dim, input_dim),
            initializer="he_normal",  # He Normal initializer for better convergence
            trainable=True,
            name="activation_funcs"
        )

    def call(self, inputs):
        # Apply learnable activation functions element-wise
        return tf.tensordot(inputs, self.activation_funcs, axes=1)

def tokenized_kan_block(inputs, token_dim, kan_layers=2):
    """
    Implements the Tok-KAN block by flattening inputs, applying KAN, and returning reshaped outputs.
    """
    # Tokenization
    tokens = layers.Reshape((-1, inputs.shape[-1]))(inputs)  # Flatten spatial dimensions into tokens
    tokens = layers.Dense(token_dim, activation='relu')(tokens)  # Map to token_dim

    # Apply KAN layers
    for _ in range(kan_layers):
        processed_tokens = KANLayer(token_dim, token_dim)(tokens)  # Use the custom KANLayer
        processed_tokens = layers.LayerNormalization()(processed_tokens)
        tokens = layers.Add()([processed_tokens, tokens])  # Residual connection

    # Reshape back to feature map dimensions
    return layers.Reshape(inputs.shape[1:])(tokens)

def unet_kan(input_shape, kan_dim=128, num_kan_layers=2):
    """
    Builds the U-KAN model following the paper's architecture.
    """
    inputs = layers.Input(shape=input_shape)

    # Contracting path
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    # Bottleneck: Tokenized KAN Block
    kan_block_output = tokenized_kan_block(p4, kan_dim, num_kan_layers)

    # Expanding path
    u1 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(kan_block_output)
    u1 = layers.concatenate([u1, c4])
    u1 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u1)

    u2 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(u1)
    u2 = layers.concatenate([u2, c3])
    u2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u2)

    u3 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(u2)
    u3 = layers.concatenate([u3, c2])
    u3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u3)

    u4 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(u3)
    u4 = layers.concatenate([u4, c1])
    u4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u4)

    u4 = layers.Dropout(0.5)(u4)

    # Output Layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(u4)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

# Instantiate the model
input_shape = (288, 288, 3)
model = unet_kan(input_shape, kan_dim=256, num_kan_layers=2)
model.summary()

##U_KAN_LSTM

In [None]:
from tensorflow.keras import layers, models
import tensorflow as tf

class KANLayer(layers.Layer):
    """
    Custom KAN layer implementing learnable activation functions.
    """
    def __init__(self, input_dim, output_dim):
        super(KANLayer, self).__init__()
        self.activation_funcs = self.add_weight(
            shape=(output_dim, input_dim),
            initializer="he_normal",
            trainable=True,
            name="activation_funcs"
        )

    def call(self, inputs):
        return tf.tensordot(inputs, self.activation_funcs, axes=1)

def tokenized_kan_block(inputs, token_dim, kan_layers=2):
    """
    Tokenized KAN Block with LSTM for feature transformation.
    """
    tokens = layers.Reshape((-1, inputs.shape[-1]))(inputs)
    tokens = layers.Dense(token_dim, activation='relu')(tokens)

    processed_tokens = KANLayer(token_dim, token_dim)(tokens)
    processed_tokens = layers.LayerNormalization()(processed_tokens)

    projected_inputs = layers.Conv2D(token_dim, (1, 1), activation='relu', padding='same')(inputs)
    lstm_input = layers.Reshape((1, inputs.shape[1], inputs.shape[2], token_dim))(projected_inputs)

    lstm_output = layers.ConvLSTM2D(256, (3, 3), activation='relu', padding='same', return_sequences=False)(lstm_input)
    lstm_output = layers.Reshape((-1, token_dim))(lstm_output)

    tokens = layers.Add()([processed_tokens, lstm_output])
    processed_tokens = KANLayer(token_dim, token_dim)(tokens)
    processed_tokens = layers.LayerNormalization()(processed_tokens)

    reshaped_output = layers.Reshape((inputs.shape[1], inputs.shape[2], token_dim))(processed_tokens)
    return reshaped_output

def u_kan_lstm(input_shape, kan_dim=128, num_kan_layers=2):
    """
    Builds the U-KAN model.
    """
    inputs = layers.Input(shape=input_shape)

    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    kan_block_output = tokenized_kan_block(p4, kan_dim, num_kan_layers)

    u1 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(kan_block_output)
    u1 = layers.concatenate([u1, c4])
    u1 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u1)

    u2 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(u1)
    u2 = layers.concatenate([u2, c3])
    u2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u2)

    u3 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(u2)
    u3 = layers.concatenate([u3, c2])
    u3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u3)

    u4 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(u3)
    u4 = layers.concatenate([u4, c1])
    u4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u4)
    u4 = layers.Dropout(0.5)(u4)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(u4)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

input_shape = (288, 288, 3)
model = u_kan_lstm(input_shape, kan_dim=256, num_kan_layers=2)
model.summary()


##K_UNET_LSTM_Mobilenetv2

In [None]:
from tensorflow.keras import layers, models, regularizers
import tensorflow as tf

class KANLayer(layers.Layer):
    """
    Custom KAN layer implementing learnable activation functions.

    Input:
        - inputs: Tensor of shape (batch_size, input_dim)

    Output:
        - Transformed tensor of shape (batch_size, output_dim)
    """
    def __init__(self, input_dim, output_dim):
        super(KANLayer, self).__init__()
        # Initialize learnable activation functions as weights
        self.activation_funcs = self.add_weight(
            shape=(output_dim, input_dim),
            initializer="he_normal",  # He Normal initializer for better convergence
            trainable=True,
            name="activation_funcs"
        )

    def call(self, inputs):
        # Apply learnable activation functions element-wise
        return tf.tensordot(inputs, self.activation_funcs, axes=1)

def tokenized_kan_block(inputs, token_dim, kan_layers=2):
    """
    Tokenized KAN Block with LSTM for feature transformation.

    Input:
        - inputs: Tensor of shape (batch_size, height, width, channels)
        - token_dim: Dimension of token embeddings
        - kan_layers: Number of KAN layers

    Output:
        - Tensor of shape (batch_size, height, width, token_dim)
    """
    # Reshape the input to create tokenized patches
    tokens = layers.Reshape((-1, inputs.shape[-1]))(inputs)
    tokens = layers.Dense(token_dim, activation='relu')(tokens)  # Linear projection

    # First KANLayer
    processed_tokens = KANLayer(token_dim, token_dim)(tokens)
    processed_tokens = layers.LayerNormalization()(processed_tokens)

    # Project inputs to match LSTM input dimensions
    projected_inputs = layers.Conv2D(token_dim, (1, 1), activation='relu', padding='same')(inputs)
    lstm_input = layers.Reshape((1, inputs.shape[1], inputs.shape[2], token_dim))(projected_inputs)

    # LSTM Layer
    lstm_output = layers.ConvLSTM2D(256, (3, 3), activation='relu', padding='same', return_sequences=False)(lstm_input)
    lstm_output = layers.Reshape((-1, token_dim))(lstm_output)  # Reshape to match tokens

    # Combine the outputs of first KANLayer and LSTM
    tokens = layers.Add()([processed_tokens, lstm_output])

    # Second KANLayer
    processed_tokens = KANLayer(token_dim, token_dim)(tokens)
    processed_tokens = layers.LayerNormalization()(processed_tokens)

    # Final reshape to match the bottleneck shape for subsequent layers
    # Use Conv2DTranspose to ensure shape compatibility with skip connections
    reshaped_output = layers.Reshape((inputs.shape[1], inputs.shape[2], token_dim))(processed_tokens)

    return reshaped_output

def u_kan_lstm_mobilenetv2(input_shape, kan_dim=128, num_kan_layers=2):
    """
    U-Net with KAN and LSTM using MobileNetV2 as the backbone.

    Input:
        - input_shape: Tuple representing the input image dimensions (height, width, channels)
        - kan_dim: Dimension of KAN block embeddings
        - num_kan_layers: Number of KAN layers in the bottleneck

    Output:
        - Tensor of shape (batch_size, height, width, 1) representing the segmentation mask
    """
    inputs = layers.Input(shape=input_shape)

    # Pre-trained model for the contracting path
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=input_shape,
        include_top=False,
        weights="imagenet"
    )
    base_model.trainable = False  # Freeze base model layers

    c1 = base_model.get_layer('block_1_expand_relu').output
    c2 = base_model.get_layer('block_3_expand_relu').output
    c3 = base_model.get_layer('block_6_expand_relu').output
    c4 = base_model.get_layer('block_13_expand_relu').output

    # Bottleneck: Tokenized KAN Block with LSTM
    bottleneck = tokenized_kan_block(c4, kan_dim, num_kan_layers)

    # Expanding path
    u1 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(bottleneck)

    # Adjust c4 to match u1
    c4_resized = layers.Conv2DTranspose(576, (2, 2), strides=(2, 2), padding='same')(c4)
    u1 = layers.concatenate([u1, c4_resized])
    u1 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u1)

    u2 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(u1)

    # Adjust c3 to match u2
    c3_resized = layers.Conv2DTranspose(192, (2, 2), strides=(2, 2), padding='same')(c3)
    u2 = layers.concatenate([u2, c3_resized])
    u2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u2)

    u3 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(u2)

    # Adjust c2 to match u3
    c2_resized = layers.Conv2DTranspose(96, (2, 2), strides=(2, 2), padding='same')(c2)
    u3 = layers.concatenate([u3, c2_resized])
    u3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u3)

    u4 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(u3)

    # Adjust c1 to match u4
    c1_resized = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c1)
    u4 = layers.concatenate([u4, c1_resized])
    u4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u4)

    u4 = layers.Dropout(0.5)(u4)

    # Output Layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(u4)

    # Assemble the model
    model = models.Model(inputs=base_model.input, outputs=outputs)
    return model

# Instantiate the model
input_shape = (288, 288, 3)  # RGB image of size 288x288
model = u_kan_lstm_mobilenetv2(input_shape, kan_dim=256, num_kan_layers=2)
model.summary()


#TRAIN MODEL

##LOSS (BINARY CATEGORY) + ACCURARY

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Set seed for reproducibility
SEED = 50
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Define parameters
INITIAL_LEARNING_RATE = 0.001
MIN_LEARNING_RATE = 0.0001
BETA_1 = 0.9
BETA_2 = 0.999
BATCH_SIZE = 8
DROPOUT_RATE = 0.1
MAX_EPOCHS = 100
EARLY_STOPPING_PATIENCE = 15

# Khai báo optimizer Adam với các tham số tùy chỉnh
adam_optimizer = Adam(learning_rate=INITIAL_LEARNING_RATE, beta_1=BETA_1, beta_2=BETA_2)

# Compile model với optimizer đã tuỳ chỉnh
model.compile(optimizer=adam_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=MIN_LEARNING_RATE,
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=EARLY_STOPPING_PATIENCE,
    restore_best_weights=True,
    verbose=1
)

# Prepare dataset with shuffle and fixed seed
dataset_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
dataset_train = dataset_train.shuffle(buffer_size=1024, seed=SEED).batch(BATCH_SIZE)

dataset_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
dataset_val = dataset_val.batch(BATCH_SIZE)

# Then fit the model
results = model.fit(
    dataset_train,
    validation_data=dataset_val,
    epochs=MAX_EPOCHS,
    callbacks=[reduce_lr, early_stopping]
)


##LOSS (BINARY CATEGORY + DICE) + IOU CLASS 1

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import BinaryIoU
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
import tensorflow.keras.backend as K

# Set seed for reproducibility
SEED = 11
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Parameters
INITIAL_LEARNING_RATE = 0.001
MIN_LEARNING_RATE = 1e-6
BETA_1 = 0.9
BETA_2 = 0.999
BATCH_SIZE = 32
MAX_EPOCHS = 100
EARLY_STOPPING_PATIENCE = 15
num_classes = 2

# Define custom loss: BCE + Dice
def dice_loss(y_true, y_pred, smooth=1e-6):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return 1 - (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def bce_dice_loss(y_true, y_pred):
    bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
    dice = dice_loss(y_true, y_pred)
    return bce + dice

# Compile the model
adam_optimizer = Adam(
    learning_rate=INITIAL_LEARNING_RATE,
    beta_1=BETA_1,
    beta_2=BETA_2
)
model.compile(
    optimizer=adam_optimizer,
    loss=bce_dice_loss,
    metrics=[BinaryIoU(threshold=0.5, name='Binary_IoU')]
)

# Callbacks
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=MIN_LEARNING_RATE,
    verbose=1,
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=EARLY_STOPPING_PATIENCE,
    restore_best_weights=True,
    verbose=1,
    mode='min'
)

# Evaluate and print metrics after each epoch
class EvaluateAndPrintMetrics(tf.keras.callbacks.Callback):
    def __init__(self, validation_data):
        super().__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        if logs is None:
            logs = {}
        val_loss = logs.get('val_loss')
        val_binary_iou = logs.get('val_Binary_IoU')
        print(f"Epoch {epoch+1}:")
        print(f"   Validation Loss: {val_loss:.4f}")
        print(f"   Validation Binary IoU: {val_binary_iou:.4f}")

# Create callback
eval_callback = EvaluateAndPrintMetrics(validation_data=(X_val, Y_val))

# Prepare dataset with shuffle and fixed seed
dataset_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
dataset_train = dataset_train.shuffle(buffer_size=1024, seed=SEED).batch(BATCH_SIZE)

dataset_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
dataset_val = dataset_val.batch(BATCH_SIZE)

# Training
history = model.fit(
    dataset_train,
    validation_data=dataset_val,
    epochs=MAX_EPOCHS,
    callbacks=[reduce_lr, early_stopping, eval_callback]
)


Epoch 1/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - Binary_IoU: 0.4200 - loss: 1.2711   Epoch 1:
   Validation Loss: 0.8902
   Validation Binary IoU: 0.5187
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 3s/step - Binary_IoU: 0.4194 - loss: 1.2673 - val_Binary_IoU: 0.5187 - val_loss: 0.8902 - learning_rate: 0.0010
Epoch 2/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 756ms/step - Binary_IoU: 0.5045 - loss: 0.8965Epoch 2:
   Validation Loss: 0.8426
   Validation Binary IoU: 0.5590
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 822ms/step - Binary_IoU: 0.5025 - loss: 0.8968 - val_Binary_IoU: 0.5590 - val_loss: 0.8426 - learning_rate: 0.0010
Epoch 3/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 756ms/step - Binary_IoU: 0.5454 - loss: 0.8056Epoch 3:
   Validation Loss: 0.8517
   Validation Binary IoU: 0.5504
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 822m

#Save model

In [None]:
model.save("okila.h5")
print("Model has been saved to my_model.h5")


#Import model

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras import layers
import tensorflow as tf

# Define your custom layer
class KANLayer(layers.Layer):
    """
    Custom KAN layer implementing learnable activation functions.
    """
    def __init__(self, input_dim, output_dim, **kwargs):
        super(KANLayer, self).__init__(**kwargs)  # Ensure to accept other arguments like 'trainable'
        # Initialize learnable activation functions as weights
        self.activation_funcs = self.add_weight(
            shape=(output_dim, input_dim),
            initializer="he_normal",  # He Normal initializer for better convergence
            trainable=True,
            name="activation_funcs"
        )

    def call(self, inputs):
        # Apply learnable activation functions element-wise
        return tf.tensordot(inputs, self.activation_funcs, axes=1)

# Load the saved model with the custom layer and without compiling
model = load_model(
    "/kaggle/input/mobilenet_unet_lstm_kan/tensorflow2/default/1/okila.h5",
    custom_objects={"KANLayer": KANLayer},
    compile=False  # Avoid compilation during model loading
)

print("Model has been loaded successfully.")


#EVALUATE MODEL

In [None]:
import numpy as np
import tensorflow as tf
import time

# Dice Coefficient function
def dice_coefficient(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    return (2. * intersection + 1e-6) / (np.sum(y_true) + np.sum(y_pred) + 1e-6)

# Predict labels for test set
start_time = time.time()  # Start to calculate time
Y_pred = model.predict(X_test)
end_time = time.time()  # End to calculate time

# Average time per image
num_images = X_test.shape[0]
average_time_per_image = (end_time - start_time) / num_images

# Convert predictions to binary labels
Y_pred = (Y_pred > 0.5).astype(np.uint8)

# Initialize metrics
num_classes = 2  # Example: background and fire
iou_scores = []
dice_scores = []
pixel_accuracy = 0
mean_accuracy = 0
freq_weighted_iou = 0
total_pixels = np.prod(Y_test.shape)

for i in range(num_classes):
    y_true_class = (Y_test == i).astype(np.uint8)
    y_pred_class = (Y_pred == i).astype(np.uint8)

    # Calculate Intersection and Union
    intersection = np.sum(y_true_class * y_pred_class)
    union = np.sum(y_true_class) + np.sum(y_pred_class) - intersection
    iou = (intersection + 1e-6) / (union + 1e-6)
    iou_scores.append(iou)

    # Calculate Dice Coefficient
    dice = dice_coefficient(y_true_class, y_pred_class)
    dice_scores.append(dice)

    # Pixel Accuracy for class
    pixel_accuracy += intersection
    mean_accuracy += intersection / (np.sum(y_true_class) + 1e-6)

    # FWIoU Component
    freq_weighted_iou += (np.sum(y_true_class) / total_pixels) * iou

# Normalize metrics
pixel_accuracy /= total_pixels
mean_accuracy /= num_classes

# Calculate Mean IoU and Mean Dice
mean_iou = np.mean(iou_scores)
mean_dice = np.mean(dice_scores)

# Print results
print(f"Pixel Accuracy: {pixel_accuracy:.4f}")
print(f"Mean Accuracy: {mean_accuracy:.4f}")
print(f"Mean IoU: {mean_iou:.4f}")
print(f"Frequency Weighted IoU: {freq_weighted_iou:.4f}")
print(f"Mean Dice Coefficient: {mean_dice:.4f}")
print(f"Average time per image: {average_time_per_image:.6f} seconds")


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 941ms/step
Pixel Accuracy: 0.9667
Mean Accuracy: 0.8468
Mean IoU: 0.7762
Frequency Weighted IoU: 0.9398
Mean Dice Coefficient: 0.8611
Average time per image: 0.035933 seconds


In [None]:
import numpy as np
import tensorflow as tf
import time

# Dice Coefficient function
def dice_coefficient(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    return (2. * intersection + 1e-6) / (np.sum(y_true) + np.sum(y_pred) + 1e-6)

# Predict labels for test set
start_time = time.time()  # Start to calculate time
Y_pred = model.predict(X_test)
end_time = time.time()  # End to calculate time

# Average time per image
num_images = X_test.shape[0]
average_time_per_image = (end_time - start_time) / num_images

# Convert to binary label
Y_pred = (Y_pred > 0.5).astype(np.uint8)

# Number of layers: 2 (background and fire)
num_classes = 2

# Initialize Mean IoU metric
iou_scores = []
dice_scores = []

for i in range(num_classes):
    y_true_class = (Y_test == i).astype(np.uint8)
    y_pred_class = (Y_pred == i).astype(np.uint8)

    # Tính IoU cho từng lớp
    intersection = np.sum(y_true_class * y_pred_class)
    union = np.sum(y_true_class) + np.sum(y_pred_class) - intersection
    iou = (intersection + 1e-6) / (union + 1e-6)
    iou_scores.append(iou)

    # Tính Dice Coefficient cho từng lớp
    dice = dice_coefficient(y_true_class, y_pred_class)
    dice_scores.append(dice)

# Calculate Mean IoU and Mean Dice
mean_iou = np.mean(iou_scores)
mean_dice = np.mean(dice_scores)

# Print results
for i in range(num_classes):
    print(f"Class {i}: IoU = {iou_scores[i]:.4f}, Dice = {dice_scores[i]:.4f}")
print(f"Mean IoU: {mean_iou:.4f}")
print(f"Mean Dice Coefficient: {mean_dice:.4f}")
print(f"Average time per image: {average_time_per_image:.6f} seconds")

#Print image

In [None]:
import matplotlib.pyplot as plt
# một số ảnh và mask
num_images = 250# Số lượng ảnh muốn hiển thị
for i in range(num_images):
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 3, 1)
    plt.title("Original Image")
    plt.imshow(X_test[i])

    plt.subplot(1, 3, 2)
    plt.title("True Mask")
    plt.imshow(Y_test[i].squeeze(), cmap='gray')  # Squeeze để loại bỏ chiều không cần thiết

    plt.subplot(1, 3, 3)
    plt.title("Predicted Mask")
    plt.imshow(Y_pred[i].squeeze(), cmap='gray')

    plt.show()