In [1]:
!pip install escnn

Collecting escnn
  Downloading escnn-1.0.11-py3-none-any.whl.metadata (19 kB)
Collecting lie-learn (from escnn)
  Downloading lie_learn-0.0.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.3 kB)
Collecting pymanopt (from escnn)
  Downloading pymanopt-2.2.1-py3-none-any.whl.metadata (7.0 kB)
Collecting py3nj (from escnn)
  Downloading py3nj-0.2.1.tar.gz (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.0/50.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading escnn-1.0.11-py3-none-any.whl (373 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.9/373.9 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lie_learn-0.0.2-c

In [2]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
import escnn
from escnn import gspaces
from escnn import nn as enn
from PIL import Image, ImageOps
from skimage.util import random_noise
from skimage import exposure
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, jaccard_score

`Helper functions`

In [3]:
def load_image(image_path, target_size):
    image = Image.open(image_path).convert('RGB' if target_size[2] == 3 else 'L')
    transform = transforms.Compose([
        transforms.Resize(target_size[:2]),
    ])
    return transform(image)

def load_real_data(data_dir, target_size=(256, 256)):
    landslide_dir = os.path.join(data_dir, 'landslide')
    non_landslide_dir = os.path.join(data_dir, 'non-landslide')

    images = []
    dems = []
    labels = []

    def process_image(image_path, dem_path, label):
        image = load_image(image_path, target_size + (3,))
        dem = load_image(dem_path, target_size + (1,))

        images.append(transforms.ToTensor()(image))
        dems.append(transforms.ToTensor()(dem))
        labels.append(label)

    for filename in os.listdir(os.path.join(landslide_dir, 'image')):
        if filename.endswith(".png"):
            image_path = os.path.join(landslide_dir, 'image', filename)
            dem_path = os.path.join(landslide_dir, 'dem', filename)
            process_image(image_path, dem_path, label=1)

    for filename in os.listdir(os.path.join(non_landslide_dir, 'image')):
        if filename.endswith(".png"):
            image_path = os.path.join(non_landslide_dir, 'image', filename)
            dem_path = os.path.join(non_landslide_dir, 'dem', filename)
            process_image(image_path, dem_path, label=0)

    images = torch.stack(images)
    dems = torch.stack(dems)
    labels = torch.tensor(labels, dtype=torch.float32)

    return images, dems, labels

`Network components`

In [4]:
class P4ConvBlock(enn.EquivariantModule):
    def __init__(self, in_type, out_type):
        super(P4ConvBlock, self).__init__()
        
        self.block = enn.SequentialModule(
            enn.R2Conv(in_type, out_type, kernel_size=3, padding=1),
            enn.InnerBatchNorm(out_type),
            enn.ReLU(out_type, inplace=True),
            enn.R2Conv(out_type, out_type, kernel_size=3, padding=1),
            enn.InnerBatchNorm(out_type),
            enn.ReLU(out_type, inplace=True)
        )
        
        self.in_type = in_type
        self.out_type = out_type
        
    def forward(self, x):
        return self.block(x)
    
    def evaluate_output_shape(self, input_shape):
        return self.block.evaluate_output_shape(input_shape)

class P4EquivariantEncoder(nn.Module):
    def __init__(self):
        super(P4EquivariantEncoder, self).__init__()
        
        # Define the symmetry group: P4 = C4 (90-degree rotations) on R2
        self.r2_act = gspaces.rot2dOnR2(N=4)
        
        # Define input types for RGB and DEM
        self.rgb_type = enn.FieldType(self.r2_act, [self.r2_act.trivial_repr]*3)  # RGB channels transform trivially
        self.dem_type = enn.FieldType(self.r2_act, [self.r2_act.trivial_repr]*1)  # DEM channel transforms trivially
        
        # Define feature types with appropriate dimensions to match original model
        # First layer: RGB (3 -> 16) and DEM (1 -> 8)
        # The regular representation of C4 has dimension 4, so we need 4 copies for 16 channels
        self.feat_type_rgb = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*4)  # 4*4=16 channels
        self.feat_type_dem = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*2)  # 2*4=8 channels
        
        # Combined features after concatenation (16 + 8 = 24 channels, or 6 regular representations)
        self.feat_type_combined = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*6)
        
        # Define the rest of the feature types
        self.feat_type_c2 = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*8)   # 8*4=32 channels
        self.feat_type_c3 = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*16)  # 16*4=64 channels
        self.feat_type_c4 = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*32)  # 32*4=128 channels
        self.feat_type_bn = enn.FieldType(self.r2_act, [self.r2_act.regular_repr]*64)  # 64*4=256 channels
        
        # Create the network
        self.conv1_rgb = P4ConvBlock(self.rgb_type, self.feat_type_rgb)
        self.conv1_dem = P4ConvBlock(self.dem_type, self.feat_type_dem)
        
        # Create a 1x1 convolution to combine RGB and DEM features
        self.combine_features = enn.R2Conv(
            enn.FieldType(self.r2_act, self.feat_type_rgb.representations + self.feat_type_dem.representations),
            self.feat_type_combined,
            kernel_size=1
        )
        
        self.conv2 = P4ConvBlock(self.feat_type_combined, self.feat_type_c2)
        self.conv3 = P4ConvBlock(self.feat_type_c2, self.feat_type_c3)
        self.conv4 = P4ConvBlock(self.feat_type_c3, self.feat_type_c4)
        self.bottleneck = P4ConvBlock(self.feat_type_c4, self.feat_type_bn)
        
        # Global pooling (invariant to transformations)
        self.spatial_pool = enn.PointwiseAdaptiveAvgPool(self.feat_type_bn, 1)
        self.group_pool = enn.GroupPooling(self.feat_type_bn)
        
        # Final fully connected layer
        # After group pooling, we have invariant features (256 / 4 = 64)
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x_rgb, x_dem):
        # Wrap inputs as GeometricTensors
        x_rgb = enn.GeometricTensor(x_rgb, self.rgb_type)
        x_dem = enn.GeometricTensor(x_dem, self.dem_type)
        
        # Process RGB and DEM separately
        c1_rgb = self.conv1_rgb(x_rgb)
        c1_dem = self.conv1_dem(x_dem)
        
        # Concatenate the features
        stacked_tensor = torch.cat([c1_rgb.tensor, c1_dem.tensor], dim=1)
        stacked_type = enn.FieldType(self.r2_act, c1_rgb.type.representations + c1_dem.type.representations)
        stacked_geo = enn.GeometricTensor(stacked_tensor, stacked_type)
        
        # Apply 1x1 convolution to map to the combined feature type
        combined = self.combine_features(stacked_geo)
        
        # Continue with the remaining layers
        c2 = self.conv2(combined)
        p2 = enn.PointwiseMaxPool(c2.type, kernel_size=2)(c2)
        
        c3 = self.conv3(p2)
        p3 = enn.PointwiseMaxPool(c3.type, kernel_size=2)(c3)
        
        c4 = self.conv4(p3)
        p4 = enn.PointwiseMaxPool(c4.type, kernel_size=2)(c4)
        
        bn = self.bottleneck(p4)
        
        # Global pooling (first spatial, then group)
        spatially_pooled = self.spatial_pool(bn)
        group_pooled = self.group_pool(spatially_pooled)
        
        # Pass to the fully connected layer
        return self.fc(group_pooled.tensor)

In [5]:
# from torchinfo import summary

# model = Encoder().to(device)
# summary(model, input_size=[(1, 3, 256, 256), (1, 1, 256, 256)]) 

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [7]:
data_dir = '/kaggle/input/bijie/Bijie_dataset'
images, dems, labels = load_real_data(data_dir, target_size=(256, 256))

In [8]:
len(dems)

2773

In [9]:
from sklearn.model_selection import train_test_split

X_train_img, X_test_img, X_train_dem, X_test_dem, y_train, y_test = train_test_split(
    images, dems, labels, test_size=0.2, random_state=42
)

print(X_train_img.shape, X_train_dem[0].shape, y_train[0].shape)
print("Training data size:", len(X_train_img))
print("Testing data size:", len(X_test_img))

torch.Size([2218, 3, 256, 256]) torch.Size([1, 256, 256]) torch.Size([])
Training data size: 2218
Testing data size: 555


In [10]:
class CustomDataset(Dataset):
    def __init__(self, images, dems, masks):
        self.images = images
        self.dems = dems
        self.masks = masks

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return {
            'image': self.images[idx],
            'dem': self.dems[idx],
            'label': self.masks[idx]
        }

train_dataset = CustomDataset(X_train_img, X_train_dem, y_train)
val_dataset = CustomDataset(X_test_img, X_test_dem, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [11]:
model = P4EquivariantEncoder().to(device)
epochs = 200
best_val_loss = float('inf')
best_model_path = 'best_unet_model.pth'

In [12]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 197759


In [13]:
criterion = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        images = batch['image'].to(device)
        dems = batch['dem'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(images, dems).squeeze(1) 

        binary_preds = (outputs > 0.5).float()

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0.0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for batch in val_loader:
            images = batch['image'].to(device)
            dems = batch['dem'].to(device)
            labels = batch['label'].to(device)

            outputs = model(images, dems).squeeze(1)

            binary_preds = (outputs > 0.5).float()

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            preds = binary_preds.cpu().numpy().astype(int)
            targets = labels.cpu().numpy().astype(int)
            all_preds.append(preds)
            all_targets.append(targets)

    val_loss /= len(val_loader)

    all_preds = np.concatenate([pred.flatten() for pred in all_preds])
    all_targets = np.concatenate([target.flatten() for target in all_targets])

    accuracy = accuracy_score(all_targets, all_preds)
    precision = precision_score(all_targets, all_preds, zero_division=0)
    recall = recall_score(all_targets, all_preds, zero_division=0)
    f1 = f1_score(all_targets, all_preds)
    iou = jaccard_score(all_targets, all_preds)

    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss}, Val Loss: {val_loss}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, IoU: {iou}, F1: {f1}")

    scheduler.step(val_loss)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), best_model_path)
        print(f"Saved best model with val loss: {best_val_loss}")

print("Training complete. Best model saved to", best_model_path)



Epoch 1/200, Train Loss: 0.17103432435140337, Val Loss: 0.13089523507016046, Accuracy: 0.8252252252252252, Precision: 0.6878980891719745, Recall: 0.6923076923076923, IoU: 0.526829268292683, F1: 0.6900958466453673
Saved best model with val loss: 0.13089523507016046
Epoch 2/200, Train Loss: 0.10831466201934026, Val Loss: 0.08284721390477248, Accuracy: 0.8846846846846846, Precision: 0.8770491803278688, Recall: 0.6858974358974359, IoU: 0.6257309941520468, F1: 0.7697841726618705
Saved best model with val loss: 0.08284721390477248
Epoch 3/200, Train Loss: 0.08265241983232738, Val Loss: 0.10230220194373812, Accuracy: 0.8504504504504504, Precision: 0.9866666666666667, Recall: 0.47435897435897434, IoU: 0.4713375796178344, F1: 0.6406926406926406
Epoch 4/200, Train Loss: 0.06196160069097289, Val Loss: 0.05031318539487464, Accuracy: 0.9423423423423424, Precision: 0.9025974025974026, Recall: 0.8910256410256411, IoU: 0.8128654970760234, F1: 0.8967741935483873
Saved best model with val loss: 0.050313