In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os

BASE_DIR = "/content/drive/MyDrive/Landslide"

TRAIN_ZIP = os.path.join(BASE_DIR, "train_data.zip")
TEST_ZIP  = os.path.join(BASE_DIR, "test_data.zip")

TRAIN_CSV = os.path.join(BASE_DIR, "Train.csv")
TEST_CSV  = os.path.join(BASE_DIR, "Test.csv")
SUB_CSV   = os.path.join(BASE_DIR, "SampleSubmission.csv")

EXTRACT_DIR = "/content/landslide_data"

os.makedirs(EXTRACT_DIR, exist_ok=True)


In [None]:
import zipfile

def unzip(zip_path, out_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(out_dir)

unzip(TRAIN_ZIP, EXTRACT_DIR)
unzip(TEST_ZIP, EXTRACT_DIR)

print("Extraction complete.")


Extraction complete.


In [None]:
import pandas as pd

train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)
sub_df   = pd.read_csv(SUB_CSV)

print("Train CSV shape:", train_df.shape)
print("Test CSV shape :", test_df.shape)
print("Submission shape:", sub_df.shape)

train_df.head()


Train CSV shape: (7147, 2)
Test CSV shape : (5398, 1)
Submission shape: (5398, 2)


Unnamed: 0,ID,label
0,ID_HUD1ST,1
1,ID_KGE2HY,1
2,ID_VHV9BL,1
3,ID_ZT0VEJ,0
4,ID_5NFXVY,0


In [None]:
print(train_df.columns)



Index(['ID', 'label'], dtype='object')


In [None]:
label_col = [c for c in train_df.columns if "label" in c.lower() or "landslide" in c.lower()][0]

train_df[label_col].value_counts()


Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,5892
1,1255


In [None]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset


In [None]:
class LandslideDataset(Dataset):
    def __init__(self, csv_path, image_dir, is_train=True):
        """
        csv_path : path to Train.csv or Test.csv
        image_dir: directory containing .npy files
        is_train : whether labels are present
        """
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        image_id = row["ID"]
        img_path = os.path.join(self.image_dir, image_id + ".npy")

        # Load numpy image
        img = np.load(img_path)  # (H, W, C)

        # Safety check (paper uses 12 bands)
        assert img.ndim == 3, "Image must be HWC"
        assert img.shape[2] == 12, f"Expected 12 channels, got {img.shape[2]}"

        # Convert to torch tensor (C, H, W)
        img = torch.from_numpy(img).permute(2, 0, 1).float()

        if self.is_train:
            label = torch.tensor(row["label"], dtype=torch.long)
            return img, label
        else:
            return img, image_id


In [None]:
TRAIN_IMG_DIR = "/content/landslide_data/train"
TEST_IMG_DIR  = "/content/landslide_data/test"

train_dataset = LandslideDataset(
    csv_path=TRAIN_CSV,
    image_dir=TRAIN_IMG_DIR,
    is_train=True
)

test_dataset = LandslideDataset(
    csv_path=TEST_CSV,
    image_dir=TEST_IMG_DIR,
    is_train=False
)

print("Train samples:", len(train_dataset))
print("Test samples :", len(test_dataset))


Train samples: 7147
Test samples : 5398


In [None]:
TRAIN_IMG_DIR = "/content/landslide_data/train_data"
TEST_IMG_DIR  = "/content/landslide_data/test_data"


In [None]:
class LandslideDataset(Dataset):
    def __init__(self, csv_path, image_dir, is_train=True):
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        image_id = row["ID"]
        img_path = os.path.join(self.image_dir, image_id + ".npy")

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Missing image: {img_path}")

        # Load image (H, W, C)
        img = np.load(img_path)

        # Paper requirement: 12 bands
        assert img.ndim == 3, "Expected HWC image"
        assert img.shape[2] == 12, f"Expected 12 channels, got {img.shape[2]}"

        # Convert to torch tensor (C, H, W)
        img = torch.from_numpy(img).permute(2, 0, 1).float()

        if self.is_train:
            label = torch.tensor(row["label"], dtype=torch.long)
            return img, label
        else:
            return img, image_id


In [None]:
train_dataset = LandslideDataset(
    csv_path=TRAIN_CSV,
    image_dir=TRAIN_IMG_DIR,
    is_train=True
)

test_dataset = LandslideDataset(
    csv_path=TEST_CSV,
    image_dir=TEST_IMG_DIR,
    is_train=False
)

print("Train samples:", len(train_dataset))
print("Test samples :", len(test_dataset))


Train samples: 7147
Test samples : 5398


In [None]:
img, label = train_dataset[0]

print("Image tensor shape:", img.shape)
print("Label:", label.item())
print("Min / Max:", img.min().item(), img.max().item())


Image tensor shape: torch.Size([12, 64, 64])
Label: 1
Min / Max: -50.600467681884766 6372.0


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.metrics import structural_similarity as ssim
from scipy.stats import beta


In [None]:
# Filter landslide samples
landslide_df = train_dataset.df[train_dataset.df["label"] == 1].reset_index(drop=True)

print("Original landslide samples:", len(landslide_df))


Original landslide samples: 1255


In [None]:
landslide_images = []
landslide_ids = []

for _, row in tqdm(landslide_df.iterrows(), total=len(landslide_df)):
    img_id = row["ID"]
    img = np.load(os.path.join(TRAIN_IMG_DIR, img_id + ".npy"))
    landslide_images.append(img)
    landslide_ids.append(img_id)

landslide_images = np.array(landslide_images)  # (N, H, W, C)

print("Loaded landslide images:", landslide_images.shape)


100%|██████████| 1255/1255 [00:07<00:00, 177.25it/s]


Loaded landslide images: (1255, 64, 64, 12)


In [None]:
def multiband_ssim(img1, img2):
    ssim_vals = []
    for c in range(img1.shape[2]):
        ssim_c = ssim(
            img1[:, :, c],
            img2[:, :, c],
            data_range=img1[:, :, c].max() - img1[:, :, c].min()
        )
        ssim_vals.append(ssim_c)
    return np.mean(ssim_vals)


In [None]:
k = 5
neighbors = []

print("Computing SSIM-based neighbors...")

for i in tqdm(range(len(landslide_images))):
    similarities = []
    for j in range(len(landslide_images)):
        if i == j:
            continue
        score = multiband_ssim(landslide_images[i], landslide_images[j])
        similarities.append((score, j))

    similarities.sort(reverse=True, key=lambda x: x[0])
    top_k = [idx for _, idx in similarities[:k]]
    neighbors.append(top_k)


Computing SSIM-based neighbors...


100%|██████████| 1255/1255 [3:10:51<00:00,  9.12s/it]


In [None]:
ALPHA = 2
BETA = 2
LAMBDA_MIN = 0.1
LAMBDA_MAX = 0.9

NUM_SYNTHETIC = 6275  # paper value

synthetic_images = []
synthetic_ids = []

rng = np.random.default_rng(seed=42)

print("Generating synthetic images...")

for n in tqdm(range(NUM_SYNTHETIC)):
    anchor_idx = rng.integers(0, len(landslide_images))
    neighbor_idx = rng.choice(neighbors[anchor_idx])

    anchor = landslide_images[anchor_idx]
    neighbor = landslide_images[neighbor_idx]

    lam = beta.rvs(ALPHA, BETA)
    lam = np.clip(lam, LAMBDA_MIN, LAMBDA_MAX)

    synthetic = lam * anchor + (1 - lam) * neighbor

    syn_id = f"SMOTE_{n:05d}"
    synthetic_images.append(synthetic.astype(np.float32))
    synthetic_ids.append(syn_id)


Generating synthetic images...


100%|██████████| 6275/6275 [00:05<00:00, 1083.61it/s]


In [None]:
SMOTE_DIR = os.path.join(TRAIN_IMG_DIR, "smote")
os.makedirs(SMOTE_DIR, exist_ok=True)

for img, img_id in zip(synthetic_images, synthetic_ids):
    np.save(os.path.join(SMOTE_DIR, img_id + ".npy"), img)


In [None]:
smote_df = pd.DataFrame({
    "ID": synthetic_ids,
    "label": 1
})

augmented_train_df = pd.concat(
    [train_dataset.df, smote_df],
    ignore_index=True
)

print("New training size:", len(augmented_train_df))
print("New class distribution:")
print(augmented_train_df["label"].value_counts())


In [None]:
AUG_TRAIN_CSV = "/content/drive/MyDrive/Landslide/Train_SMOTE.csv"
augmented_train_df.to_csv(AUG_TRAIN_CSV, index=False)

print("Saved augmented CSV:", AUG_TRAIN_CSV)


In [None]:
import torch
import torch.nn.functional as F
import random
import torchvision.transforms.functional as TF


In [None]:
def one_hot(labels, num_classes=2):
    return F.one_hot(labels, num_classes=num_classes).float()


In [None]:
def mixup(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)

    batch_size = images.size(0)
    index = torch.randperm(batch_size)

    mixed_images = lam * images + (1 - lam) * images[index]
    mixed_labels = lam * labels + (1 - lam) * labels[index]

    return mixed_images, mixed_labels


In [None]:
def rand_bbox(size, lam):
    _, _, H, W = size
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    cx = np.random.randint(W)
    cy = np.random.randint(H)

    x1 = np.clip(cx - cut_w // 2, 0, W)
    y1 = np.clip(cy - cut_h // 2, 0, H)
    x2 = np.clip(cx + cut_w // 2, 0, W)
    y2 = np.clip(cy + cut_h // 2, 0, H)

    return x1, y1, x2, y2


In [None]:
def cutmix(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)

    batch_size = images.size(0)
    index = torch.randperm(batch_size)

    x1, y1, x2, y2 = rand_bbox(images.size(), lam)
    images[:, :, y1:y2, x1:x2] = images[index, :, y1:y2, x1:x2]

    lam = 1 - ((x2 - x1) * (y2 - y1) / (images.size(-1) * images.size(-2)))
    mixed_labels = lam * labels + (1 - lam) * labels[index]

    return images, mixed_labels

In [None]:
def random_color_geom_transform(img):
    # img: (C, H, W)
    if random.random() < 0.5:
        img = TF.adjust_brightness(img, brightness_factor=random.uniform(0.8, 1.2))
    if random.random() < 0.5:
        img = TF.adjust_contrast(img, contrast_factor=random.uniform(0.8, 1.2))

    if random.random() < 0.5:
        angle = random.uniform(-15, 15)
        img = TF.rotate(img, angle)

    if random.random() < 0.5:
        img = TF.hflip(img)

    if random.random() < 0.5:
        img = TF.vflip(img)

    return img


In [None]:
def apply_online_augmentation(images, labels):
    # images: (B, C, H, W)
    # labels: (B,) hard labels

    # Convert to soft labels
    labels = one_hot(labels)

    # Per-image color & geometry
    aug_images = []
    for img in images:
        aug_images.append(random_color_geom_transform(img))
    images = torch.stack(aug_images)

    # MixUp or CutMix (random choice)
    r = random.random()
    if r < 0.5:
        images, labels = mixup(images, labels, alpha=1.0)
    else:
        images, labels = cutmix(images, labels, alpha=1.0)

    return images, labels


In [None]:
!pip install -q timm


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import timm
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR


In [None]:
class ResizeTo256(nn.Module):
    def forward(self, x):
        return F.interpolate(
            x, size=(256, 256),
            mode="bilinear", align_corners=False
        )


In [None]:
class EfficientNetV2_Landslide(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()

        self.resize = ResizeTo256()

        self.backbone = timm.create_model(
            "efficientnetv2_l",
            pretrained=True,
            num_classes=0  # remove FC
        )

        # Modify first conv to accept 12 channels
        old_conv = self.backbone.conv_stem
        self.backbone.conv_stem = nn.Conv2d(
            in_channels=12,
            out_channels=old_conv.out_channels,
            kernel_size=old_conv.kernel_size,
            stride=old_conv.stride,
            padding=old_conv.padding,
            bias=False
        )

        # Initialize weights (ImageNet-style)
        nn.init.kaiming_normal_(self.backbone.conv_stem.weight)

        self.classifier = nn.Linear(
            self.backbone.num_features, num_classes
        )

    def forward(self, x):
        x = self.resize(x)
        features = self.backbone(x)
        logits = self.classifier(features)
        return logits, features


In [None]:
def kl_divergence_loss(logits, soft_targets):
    log_probs = F.log_softmax(logits, dim=1)
    return F.kl_div(log_probs, soft_targets, reduction="batchmean")


In [None]:
BATCH_SIZE = 36

train_smote_dataset = LandslideDataset(
    csv_path=AUG_TRAIN_CSV,
    image_dir=TRAIN_IMG_DIR,
    is_train=True
)

train_loader = DataLoader(
    train_smote_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)


In [None]:
import os

SMOTE_DIR = "/content/landslide_data/train_data/smote"

files = os.listdir(SMOTE_DIR)
print("Number of SMOTE files:", len(files))
print("Sample files:", files[:5])


Number of SMOTE files: 6275
Sample files: ['SMOTE_05531.npy', 'SMOTE_03115.npy', 'SMOTE_01287.npy', 'SMOTE_05098.npy', 'SMOTE_05013.npy']


In [None]:
import pandas as pd

aug_df = pd.read_csv("/content/drive/MyDrive/Landslide/Train_SMOTE.csv")

print("Total rows:", len(aug_df))
print(aug_df.tail())


Total rows: 13422
                ID  label
13417  SMOTE_06270      1
13418  SMOTE_06271      1
13419  SMOTE_06272      1
13420  SMOTE_06273      1
13421  SMOTE_06274      1


In [None]:
# find one SMOTE sample
smote_row = aug_df[aug_df["ID"].str.startswith("SMOTE_")].iloc[0]
print(smote_row)

# try loading it
smote_img = np.load(
    os.path.join("/content/landslide_data/train_data/smote",
                 smote_row["ID"] + ".npy")
)

print("SMOTE image shape:", smote_img.shape)


ID       SMOTE_00000
label              1
Name: 7147, dtype: object
SMOTE image shape: (64, 64, 12)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import timm


In [None]:
class ResizeTo256(nn.Module):
    def forward(self, x):
        return F.interpolate(
            x, size=(256, 256),
            mode="bilinear", align_corners=False
        )


In [None]:
class EfficientNetV2_Landslide(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()

        self.resize = ResizeTo256()

        self.backbone = timm.create_model(
            "tf_efficientnetv2_l",
            pretrained=True,
            num_classes=0
        )

        old_conv = self.backbone.conv_stem
        self.backbone.conv_stem = nn.Conv2d(
            in_channels=12,
            out_channels=old_conv.out_channels,
            kernel_size=old_conv.kernel_size,
            stride=old_conv.stride,
            padding=old_conv.padding,
            bias=False
        )
        nn.init.kaiming_normal_(self.backbone.conv_stem.weight)

        self.classifier = nn.Linear(
            self.backbone.num_features, num_classes
        )

    def forward(self, x):
        x = self.resize(x)
        features = self.backbone(x)
        logits = self.classifier(features)
        return logits, features


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = EfficientNetV2_Landslide().to(device)

optimizer = Adam(model.parameters(), lr=3e-4)

scheduler = CosineAnnealingLR(
    optimizer,
    T_max=50
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/476M [00:00<?, ?B/s]

In [None]:
train_smote_dataset = LandslideDataset(
    csv_path="/content/drive/MyDrive/Landslide/Train_SMOTE.csv",
    image_dir="/content/landslide_data/train_data",
    is_train=True
)

train_loader = DataLoader(
    train_smote_dataset,
    batch_size=36,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)


In [None]:
class LandslideDataset(Dataset):
    def __init__(self, csv_path, image_dir, is_train=True):
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.smote_dir = os.path.join(image_dir, "smote")
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_id = row["ID"]

        # Decide path
        if image_id.startswith("SMOTE_"):
            img_path = os.path.join(self.smote_dir, image_id + ".npy")
        else:
            img_path = os.path.join(self.image_dir, image_id + ".npy")

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Missing image: {img_path}")

        img = np.load(img_path)

        # Safety checks
        assert img.ndim == 3, "Expected HWC image"
        assert img.shape[2] == 12, f"Expected 12 channels, got {img.shape[2]}"

        img = torch.from_numpy(img).permute(2, 0, 1).float()

        if self.is_train:
            label = torch.tensor(row["label"], dtype=torch.long)
            return img, label
        else:
            return img, image_id


In [None]:
train_smote_dataset = LandslideDataset(
    csv_path="/content/drive/MyDrive/Landslide/Train_SMOTE.csv",
    image_dir="/content/landslide_data/train_data",
    is_train=True
)

train_loader = DataLoader(
    train_smote_dataset,
    batch_size=36,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)


In [None]:
def random_intensity_transform(img):
    # img: (C, H, W)
    if random.random() < 0.5:
        brightness = torch.empty(1).uniform_(0.8, 1.2).item()
        img = img * brightness

    if random.random() < 0.5:
        contrast = torch.empty(1).uniform_(0.8, 1.2).item()
        mean = img.mean(dim=(1, 2), keepdim=True)
        img = (img - mean) * contrast + mean

    if random.random() < 0.5:
        noise = torch.randn_like(img) * 0.01
        img = img + noise

    return img


In [None]:
def random_geometry_transform(img):
    # Horizontal flip
    if random.random() < 0.5:
        img = torch.flip(img, dims=[2])

    # Vertical flip
    if random.random() < 0.5:
        img = torch.flip(img, dims=[1])

    # 90-degree rotations
    if random.random() < 0.5:
        k = random.choice([1, 2, 3])
        img = torch.rot90(img, k, dims=[1, 2])

    return img


In [None]:
def apply_online_augmentation(images, labels):
    """
    images: (B, C, H, W)
    labels: (B,)
    """
    # Convert to soft labels
    labels = one_hot(labels)

    aug_images = []
    for img in images:
        img = random_intensity_transform(img)
        img = random_geometry_transform(img)
        aug_images.append(img)

    images = torch.stack(aug_images)

    # MixUp or CutMix
    if random.random() < 0.5:
        images, labels = mixup(images, labels, alpha=1.0)
    else:
        images, labels = cutmix(images, labels, alpha=1.0)

    return images, labels


In [None]:
# ===============================
# 1. Imports
# ===============================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import timm
import numpy as np
import pandas as pd
import random
import os

# ===============================
# 2. Device
# ===============================
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# ===============================
# 3. Dataset (FINAL FIXED VERSION)
# ===============================
class LandslideDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, image_dir, is_train=True):
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.smote_dir = os.path.join(image_dir, "smote")
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_id = row["ID"]

        if image_id.startswith("SMOTE_"):
            img_path = os.path.join(self.smote_dir, image_id + ".npy")
        else:
            img_path = os.path.join(self.image_dir, image_id + ".npy")

        img = np.load(img_path)
        img = torch.from_numpy(img).permute(2, 0, 1).float()

        if self.is_train:
            return img, torch.tensor(row["label"], dtype=torch.long)
        else:
            return img, image_id

# ===============================
# 4. Online Augmentation (SAFE)
# ===============================
def one_hot(labels, num_classes=2):
    return F.one_hot(labels, num_classes).float()

def random_intensity_transform(img):
    if random.random() < 0.5:
        img = img * random.uniform(0.8, 1.2)
    if random.random() < 0.5:
        mean = img.mean(dim=(1,2), keepdim=True)
        img = (img - mean) * random.uniform(0.8, 1.2) + mean
    if random.random() < 0.5:
        img = img + torch.randn_like(img) * 0.01
    return img

def random_geometry_transform(img):
    if random.random() < 0.5:
        img = torch.flip(img, [2])
    if random.random() < 0.5:
        img = torch.flip(img, [1])
    if random.random() < 0.5:
        img = torch.rot90(img, random.choice([1,2,3]), [1,2])
    return img

def mixup(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(images.size(0))
    return lam*images + (1-lam)*images[idx], lam*labels + (1-lam)*labels[idx]

def cutmix(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    B, C, H, W = images.size()
    idx = torch.randperm(B)
    cx, cy = np.random.randint(W), np.random.randint(H)
    w, h = int(W*np.sqrt(1-lam)), int(H*np.sqrt(1-lam))
    x1, x2 = max(cx-w//2,0), min(cx+w//2,W)
    y1, y2 = max(cy-h//2,0), min(cy+h//2,H)
    images[:, :, y1:y2, x1:x2] = images[idx, :, y1:y2, x1:x2]
    lam = 1 - (x2-x1)*(y2-y1)/(H*W)
    return images, lam*labels + (1-lam)*labels[idx]

def apply_online_augmentation(images, labels):
    labels = one_hot(labels)
    imgs = []
    for img in images:
        img = random_intensity_transform(img)
        img = random_geometry_transform(img)
        imgs.append(img)
    images = torch.stack(imgs)
    if random.random() < 0.5:
        return mixup(images, labels)
    else:
        return cutmix(images, labels)

# ===============================
# 5. Model
# ===============================
class ResizeTo256(nn.Module):
    def forward(self, x):
        return F.interpolate(x, (256,256), mode="bilinear", align_corners=False)

class EfficientNetV2_Landslide(nn.Module):
    def __init__(self):
        super().__init__()
        self.resize = ResizeTo256()
        self.backbone = timm.create_model(
            "tf_efficientnetv2_l",
            pretrained=True,
            num_classes=0
        )
        old = self.backbone.conv_stem
        self.backbone.conv_stem = nn.Conv2d(
            12, old.out_channels,
            kernel_size=old.kernel_size,
            stride=old.stride,
            padding=old.padding,
            bias=False
        )
        self.classifier = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        x = self.resize(x)
        f = self.backbone(x)
        return self.classifier(f), f

# ===============================
# 6. Build everything
# ===============================
model = EfficientNetV2_Landslide().to(device)
optimizer = Adam(model.parameters(), lr=3e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=50)

train_dataset = LandslideDataset(
    "/content/drive/MyDrive/Landslide/Train_SMOTE.csv",
    "/content/landslide_data/train_data",
    True
)

train_loader = DataLoader(train_dataset, batch_size=36, shuffle=True)

print("✅ Model, dataset, loader READY")


Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Model, dataset, loader READY


In [None]:
EPOCHS = 50
model.train()

for epoch in range(EPOCHS):
    total_loss = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        imgs, soft_labels = apply_online_augmentation(imgs, labels)
        soft_labels = soft_labels.to(device)

        optimizer.zero_grad()
        logits, _ = model(imgs)
        loss = F.kl_div(
            F.log_softmax(logits, dim=1),
            soft_labels,
            reduction="batchmean"
        )
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()
    print(f"[Epoch {epoch+1:02d}/50] Loss: {total_loss/len(train_loader):.4f}")


Drive upload

In [None]:
DRIVE_BASE = "/content/drive/MyDrive/Landslide"
DRIVE_DATA = f"{DRIVE_BASE}/landslide_data"

print("Drive base exists:", DRIVE_BASE)


Drive base exists: /content/drive/MyDrive/Landslide


In [None]:
!cp -r /content/landslide_data /content/drive/MyDrive/Landslide/


In [None]:
import os

for path in [
    f"{DRIVE_DATA}/train_data",
    f"{DRIVE_DATA}/train_data/smote",
    f"{DRIVE_DATA}/test_data"
]:
    print(path, "exists:", os.path.exists(path),
          "| files:", len(os.listdir(path)) if os.path.exists(path) else "N/A")


/content/drive/MyDrive/Landslide/landslide_data/train_data exists: True | files: 7148
/content/drive/MyDrive/Landslide/landslide_data/train_data/smote exists: True | files: 6275
/content/drive/MyDrive/Landslide/landslide_data/test_data exists: True | files: 5397


In [None]:
import pandas as pd

csv_path = f"{DRIVE_BASE}/Train_SMOTE.csv"
df = pd.read_csv(csv_path)
print("Train_SMOTE.csv shape:", df.shape)


Train_SMOTE.csv shape: (13422, 2)


In [None]:
CKPT_DIR = f"{DRIVE_BASE}/checkpoints"
os.makedirs(CKPT_DIR, exist_ok=True)
print("Checkpoint dir ready:", CKPT_DIR)


Checkpoint dir ready: /content/drive/MyDrive/Landslide/checkpoints


reset runtime

In [None]:
# =====================================
# 1. Mount Drive
# =====================================
from google.colab import drive
drive.mount('/content/drive')

# =====================================
# 2. Imports
# =====================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import timm
import numpy as np
import pandas as pd
import random
import os

# =====================================
# 3. Device check
# =====================================
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
print("GPU:", torch.cuda.get_device_name(0) if device=="cuda" else "None")

# =====================================
# 4. Paths (FROM DRIVE ONLY)
# =====================================
DATA_ROOT = "/content/drive/MyDrive/Landslide/landslide_data"
TRAIN_CSV = "/content/drive/MyDrive/Landslide/Train_SMOTE.csv"

# =====================================
# 5. Dataset (FINAL, FIXED)
# =====================================
class LandslideDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, image_dir, is_train=True):
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.smote_dir = os.path.join(image_dir, "train_data/smote")
        self.orig_dir = os.path.join(image_dir, "train_data")
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["ID"]

        if img_id.startswith("SMOTE_"):
            path = os.path.join(self.smote_dir, img_id + ".npy")
        else:
            path = os.path.join(self.orig_dir, img_id + ".npy")

        img = np.load(path)
        img = torch.from_numpy(img).permute(2,0,1).float()

        if self.is_train:
            return img, torch.tensor(row["label"], dtype=torch.long)
        else:
            return img, img_id

# =====================================
# 6. Online Augmentation (MULTISPECTRAL SAFE)
# =====================================
def one_hot(labels, num_classes=2):
    return F.one_hot(labels, num_classes).float()

def random_intensity(img):
    if random.random() < 0.5:
        img = img * random.uniform(0.8, 1.2)
    if random.random() < 0.5:
        mean = img.mean(dim=(1,2), keepdim=True)
        img = (img - mean) * random.uniform(0.8, 1.2) + mean
    if random.random() < 0.5:
        img = img + torch.randn_like(img) * 0.01
    return img

def random_geom(img):
    if random.random() < 0.5:
        img = torch.flip(img, [2])
    if random.random() < 0.5:
        img = torch.flip(img, [1])
    if random.random() < 0.5:
        img = torch.rot90(img, random.choice([1,2,3]), [1,2])
    return img

def mixup(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(images.size(0))
    return lam*images + (1-lam)*images[idx], lam*labels + (1-lam)*labels[idx]

def cutmix(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    B, C, H, W = images.size()
    idx = torch.randperm(B)
    cx, cy = np.random.randint(W), np.random.randint(H)
    w, h = int(W*np.sqrt(1-lam)), int(H*np.sqrt(1-lam))
    x1, x2 = max(cx-w//2,0), min(cx+w//2,W)
    y1, y2 = max(cy-h//2,0), min(cy+h//2,H)
    images[:,:,y1:y2,x1:x2] = images[idx,:,y1:y2,x1:x2]
    lam = 1 - (x2-x1)*(y2-y1)/(H*W)
    return images, lam*labels + (1-lam)*labels[idx]

def apply_aug(images, labels):
    labels = one_hot(labels)
    imgs = []
    for img in images:
        img = random_intensity(img)
        img = random_geom(img)
        imgs.append(img)
    images = torch.stack(imgs)
    return mixup(images, labels) if random.random()<0.5 else cutmix(images, labels)

# =====================================
# 7. Model
# =====================================
class ResizeTo256(nn.Module):
    def forward(self, x):
        return F.interpolate(x, (256,256), mode="bilinear", align_corners=False)

class EfficientNetV2_Landslide(nn.Module):
    def __init__(self):
        super().__init__()
        self.resize = ResizeTo256()
        self.backbone = timm.create_model("tf_efficientnetv2_l", pretrained=True, num_classes=0)
        old = self.backbone.conv_stem
        self.backbone.conv_stem = nn.Conv2d(
            12, old.out_channels,
            kernel_size=old.kernel_size,
            stride=old.stride,
            padding=old.padding,
            bias=False
        )
        self.classifier = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        x = self.resize(x)
        f = self.backbone(x)
        return self.classifier(f), f

# =====================================
# 8. Build training objects
# =====================================
model = EfficientNetV2_Landslide().to(device)
optimizer = Adam(model.parameters(), lr=3e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=50)

train_ds = LandslideDataset(TRAIN_CSV, DATA_ROOT, True)
train_loader = DataLoader(train_ds, batch_size=36, shuffle=True, num_workers=0)

print("✅ Resume setup complete")


Mounted at /content/drive
Using device: cuda
GPU: Tesla T4


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/476M [00:00<?, ?B/s]

✅ Resume setup complete


In [None]:
class LandslideDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, data_root, is_train=True):
        self.df = pd.read_csv(csv_path)
        self.data_root = data_root
        self.is_train = is_train

        self.orig_dir = os.path.join(data_root, "train_data")
        self.smote_dir = os.path.join(data_root, "train_data", "smote")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["ID"]

        # Try SMOTE first, then original
        candidates = [
            os.path.join(self.smote_dir, img_id + ".npy"),
            os.path.join(self.orig_dir, img_id + ".npy"),
        ]

        for path in candidates:
            if os.path.exists(path):
                img = np.load(path)
                img = torch.from_numpy(img).permute(2, 0, 1).float()
                if self.is_train:
                    return img, torch.tensor(row["label"], dtype=torch.long)
                else:
                    return img, img_id

        raise FileNotFoundError(f"Image not found for ID: {img_id}")


In [None]:
train_ds = LandslideDataset(
    csv_path="/content/drive/MyDrive/Landslide/Train_SMOTE.csv",
    data_root="/content/drive/MyDrive/Landslide/landslide_data",
    is_train=True
)

train_loader = DataLoader(
    train_ds,
    batch_size=36,
    shuffle=True,
    num_workers=0,   # IMPORTANT for Colab stability
    pin_memory=False
)

print("✅ Dataset & DataLoader rebuilt")


✅ Dataset & DataLoader rebuilt


In [None]:
import os

DATA_ROOT = "/content/drive/MyDrive/Landslide/landslide_data"
ORIG_DIR = os.path.join(DATA_ROOT, "train_data")
SMOTE_DIR = os.path.join(DATA_ROOT, "train_data", "smote")

existing_ids = set()

# Original images
for f in os.listdir(ORIG_DIR):
    if f.endswith(".npy"):
        existing_ids.add(f.replace(".npy", ""))

# SMOTE images
for f in os.listdir(SMOTE_DIR):
    if f.endswith(".npy"):
        existing_ids.add(f.replace(".npy", ""))

print("Total existing image IDs:", len(existing_ids))


Total existing image IDs: 7564


In [None]:
import pandas as pd

csv_path = "/content/drive/MyDrive/Landslide/Train_SMOTE.csv"
df = pd.read_csv(csv_path)

print("Original CSV rows:", len(df))

df_clean = df[df["ID"].isin(existing_ids)].reset_index(drop=True)

print("Clean CSV rows:", len(df_clean))
print("Removed rows:", len(df) - len(df_clean))


Original CSV rows: 13422
Clean CSV rows: 7564
Removed rows: 5858


In [None]:
CLEAN_CSV = "/content/drive/MyDrive/Landslide/Train_SMOTE_CLEAN.csv"
df_clean.to_csv(CLEAN_CSV, index=False)

print("✅ Clean CSV saved:", CLEAN_CSV)


✅ Clean CSV saved: /content/drive/MyDrive/Landslide/Train_SMOTE_CLEAN.csv


In [None]:
train_ds = LandslideDataset(
    csv_path=CLEAN_CSV,
    data_root="/content/drive/MyDrive/Landslide/landslide_data",
    is_train=True
)

train_loader = DataLoader(
    train_ds,
    batch_size=36,
    shuffle=True,
    num_workers=0,
    pin_memory=False
)

print("Dataset size:", len(train_ds))


Dataset size: 7564


In [None]:
imgs, labels = next(iter(train_loader))
print("Batch OK:", imgs.shape, labels.shape)


Batch OK: torch.Size([36, 12, 64, 64]) torch.Size([36])


Due to computational lmits ...changing the batch size yet keeping the same logic

In [None]:
# =========================================================
# FINAL SAFE TRAINING CELL (COLAB GPU READY)
# =========================================================

# -----------------
# Imports
# -----------------
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import timm
import numpy as np
import pandas as pd
import random
import os

# -----------------
# Device
# -----------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
print("GPU:", torch.cuda.get_device_name(0) if device=="cuda" else "None")

torch.cuda.empty_cache()

# -----------------
# Paths (FROM DRIVE)
# -----------------
DATA_ROOT = "/content/drive/MyDrive/Landslide/landslide_data"
TRAIN_CSV = "/content/drive/MyDrive/Landslide/Train_SMOTE_CLEAN.csv"

# -----------------
# Dataset (ROBUST)
# -----------------
class LandslideDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, data_root):
        self.df = pd.read_csv(csv_path)
        self.orig_dir = os.path.join(data_root, "train_data")
        self.smote_dir = os.path.join(data_root, "train_data", "smote")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["ID"]

        candidates = [
            os.path.join(self.smote_dir, img_id + ".npy"),
            os.path.join(self.orig_dir, img_id + ".npy"),
        ]

        for p in candidates:
            if os.path.exists(p):
                img = np.load(p)
                img = torch.from_numpy(img).permute(2,0,1).float()
                return img, torch.tensor(row["label"], dtype=torch.long)

        raise FileNotFoundError(img_id)

# -----------------
# Online Augmentation (MULTISPECTRAL SAFE)
# -----------------
def one_hot(labels, num_classes=2):
    return F.one_hot(labels, num_classes).float()

def random_intensity(img):
    if random.random() < 0.5:
        img = img * random.uniform(0.8, 1.2)
    if random.random() < 0.5:
        mean = img.mean(dim=(1,2), keepdim=True)
        img = (img - mean) * random.uniform(0.8, 1.2) + mean
    if random.random() < 0.5:
        img = img + torch.randn_like(img) * 0.01
    return img

def random_geom(img):
    if random.random() < 0.5:
        img = torch.flip(img, [2])
    if random.random() < 0.5:
        img = torch.flip(img, [1])
    if random.random() < 0.5:
        img = torch.rot90(img, random.choice([1,2,3]), [1,2])
    return img

def mixup(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(images.size(0))
    return lam*images + (1-lam)*images[idx], lam*labels + (1-lam)*labels[idx]

def cutmix(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    B, C, H, W = images.size()
    idx = torch.randperm(B)
    cx, cy = np.random.randint(W), np.random.randint(H)
    w, h = int(W*np.sqrt(1-lam)), int(H*np.sqrt(1-lam))
    x1, x2 = max(cx-w//2,0), min(cx+w//2,W)
    y1, y2 = max(cy-h//2,0), min(cy+h//2,H)
    images[:,:,y1:y2,x1:x2] = images[idx,:,y1:y2,x1:x2]
    lam = 1 - (x2-x1)*(y2-y1)/(H*W)
    return images, lam*labels + (1-lam)*labels[idx]

def apply_aug(images, labels):
    labels = one_hot(labels)
    imgs = []
    for img in images:
        img = random_intensity(img)
        img = random_geom(img)
        imgs.append(img)
    images = torch.stack(imgs)
    return mixup(images, labels) if random.random()<0.5 else cutmix(images, labels)

# -----------------
# Model
# -----------------
class ResizeTo256(nn.Module):
    def forward(self, x):
        return F.interpolate(x, (256,256), mode="bilinear", align_corners=False)

class EfficientNetV2_Landslide(nn.Module):
    def __init__(self):
        super().__init__()
        self.resize = ResizeTo256()
        self.backbone = timm.create_model(
            "tf_efficientnetv2_l",
            pretrained=True,
            num_classes=0
        )
        old = self.backbone.conv_stem
        self.backbone.conv_stem = nn.Conv2d(
            12, old.out_channels,
            kernel_size=old.kernel_size,
            stride=old.stride,
            padding=old.padding,
            bias=False
        )
        self.classifier = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        x = self.resize(x)
        f = self.backbone(x)
        return self.classifier(f), f

# -----------------
# Build Training Objects
# -----------------
BATCH_SIZE = 8   # OOM-safe
EPOCHS = 50

train_ds = LandslideDataset(TRAIN_CSV, DATA_ROOT)
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    pin_memory=False
)

model = EfficientNetV2_Landslide().to(device)
optimizer = Adam(model.parameters(), lr=3e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

scaler = torch.cuda.amp.GradScaler()

print("✅ Training setup ready")

# -----------------
# Training Loop (AMP ENABLED)
# -----------------
model.train()

for epoch in range(EPOCHS):
    total_loss = 0.0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        imgs, soft_labels = apply_aug(imgs, labels)
        soft_labels = soft_labels.to(device)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            logits, _ = model(imgs)
            loss = F.kl_div(
                F.log_softmax(logits, dim=1),
                soft_labels,
                reduction="batchmean"
            )

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    scheduler.step()
    print(f"[Epoch {epoch+1:02d}/{EPOCHS}] Loss: {total_loss/len(train_loader):.4f}")

print("🎉 TRAINING COMPLETE")


Using device: cuda
GPU: Tesla T4


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 12666 has 14.74 GiB memory in use. Of the allocated memory 14.52 GiB is allocated by PyTorch, and 98.50 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

Again restarting the sesssion

In [1]:
# ===============================
# GPU CLEAN START CELL
# ===============================

from google.colab import drive
drive.mount('/content/drive')

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))

torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
torch.cuda.synchronize()

print("Free GPU memory (MB):",
      torch.cuda.mem_get_info()[0] // 1024 // 1024)



Mounted at /content/drive
CUDA available: True
GPU: Tesla T4
Free GPU memory (MB): 14992


In [None]:
# =========================================================
# FINAL SAFE TRAINING CELL (COLAB GPU READY)
# =========================================================

# -----------------
# Imports
# -----------------
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import timm
import numpy as np
import pandas as pd
import random
import os

# -----------------
# Device
# -----------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
print("GPU:", torch.cuda.get_device_name(0) if device=="cuda" else "None")

torch.cuda.empty_cache()

# -----------------
# Paths (FROM DRIVE)
# -----------------
DATA_ROOT = "/content/drive/MyDrive/Landslide/landslide_data"
TRAIN_CSV = "/content/drive/MyDrive/Landslide/Train_SMOTE_CLEAN.csv"

# -----------------
# Dataset (ROBUST)
# -----------------
class LandslideDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, data_root):
        self.df = pd.read_csv(csv_path)
        self.orig_dir = os.path.join(data_root, "train_data")
        self.smote_dir = os.path.join(data_root, "train_data", "smote")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["ID"]

        candidates = [
            os.path.join(self.smote_dir, img_id + ".npy"),
            os.path.join(self.orig_dir, img_id + ".npy"),
        ]

        for p in candidates:
            if os.path.exists(p):
                img = np.load(p)
                img = torch.from_numpy(img).permute(2,0,1).float()
                return img, torch.tensor(row["label"], dtype=torch.long)

        raise FileNotFoundError(img_id)

# -----------------
# Online Augmentation (MULTISPECTRAL SAFE)
# -----------------
def one_hot(labels, num_classes=2):
    return F.one_hot(labels, num_classes).float()

def random_intensity(img):
    if random.random() < 0.5:
        img = img * random.uniform(0.8, 1.2)
    if random.random() < 0.5:
        mean = img.mean(dim=(1,2), keepdim=True)
        img = (img - mean) * random.uniform(0.8, 1.2) + mean
    if random.random() < 0.5:
        img = img + torch.randn_like(img) * 0.01
    return img

def random_geom(img):
    if random.random() < 0.5:
        img = torch.flip(img, [2])
    if random.random() < 0.5:
        img = torch.flip(img, [1])
    if random.random() < 0.5:
        img = torch.rot90(img, random.choice([1,2,3]), [1,2])
    return img

def mixup(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(images.size(0))
    return lam*images + (1-lam)*images[idx], lam*labels + (1-lam)*labels[idx]

def cutmix(images, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    B, C, H, W = images.size()
    idx = torch.randperm(B)
    cx, cy = np.random.randint(W), np.random.randint(H)
    w, h = int(W*np.sqrt(1-lam)), int(H*np.sqrt(1-lam))
    x1, x2 = max(cx-w//2,0), min(cx+w//2,W)
    y1, y2 = max(cy-h//2,0), min(cy+h//2,H)
    images[:,:,y1:y2,x1:x2] = images[idx,:,y1:y2,x1:x2]
    lam = 1 - (x2-x1)*(y2-y1)/(H*W)
    return images, lam*labels + (1-lam)*labels[idx]

def apply_aug(images, labels):
    labels = one_hot(labels)
    imgs = []
    for img in images:
        img = random_intensity(img)
        img = random_geom(img)
        imgs.append(img)
    images = torch.stack(imgs)
    return mixup(images, labels) if random.random()<0.5 else cutmix(images, labels)

# -----------------
# Model
# -----------------
class ResizeTo256(nn.Module):
    def forward(self, x):
        return F.interpolate(x, (256,256), mode="bilinear", align_corners=False)

class EfficientNetV2_Landslide(nn.Module):
    def __init__(self):
        super().__init__()
        self.resize = ResizeTo256()
        self.backbone = timm.create_model(
            "tf_efficientnetv2_l",
            pretrained=True,
            num_classes=0
        )
        old = self.backbone.conv_stem
        self.backbone.conv_stem = nn.Conv2d(
            12, old.out_channels,
            kernel_size=old.kernel_size,
            stride=old.stride,
            padding=old.padding,
            bias=False
        )
        self.classifier = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        x = self.resize(x)
        f = self.backbone(x)
        return self.classifier(f), f

# -----------------
# Build Training Objects
# -----------------
BATCH_SIZE = 8   # OOM-safe
EPOCHS = 50

train_ds = LandslideDataset(TRAIN_CSV, DATA_ROOT)
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    pin_memory=False
)

model = EfficientNetV2_Landslide().to(device)
optimizer = Adam(model.parameters(), lr=3e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

scaler = torch.cuda.amp.GradScaler()

print("✅ Training setup ready")

# -----------------
# Training Loop (AMP ENABLED)
# -----------------
model.train()

for epoch in range(EPOCHS):
    total_loss = 0.0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        imgs, soft_labels = apply_aug(imgs, labels)
        soft_labels = soft_labels.to(device)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            logits, _ = model(imgs)
            loss = F.kl_div(
                F.log_softmax(logits, dim=1),
                soft_labels,
                reduction="batchmean"
            )

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    scheduler.step()
    print(f"[Epoch {epoch+1:02d}/{EPOCHS}] Loss: {total_loss/len(train_loader):.4f}")

print("🎉 TRAINING COMPLETE")


Using device: cuda
GPU: Tesla T4


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/476M [00:00<?, ?B/s]

  scaler = torch.cuda.amp.GradScaler()


✅ Training setup ready


  with torch.cuda.amp.autocast():


In [None]:
# =====================================================
# SAVE MODEL, METRICS, AND GRAPHS (ALL FORMATS)
# =====================================================

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import DataLoader
import os

# -----------------------
# Paths
# -----------------------
SAVE_DIR = "/content/drive/MyDrive/Landslide/final_outputs"
os.makedirs(SAVE_DIR, exist_ok=True)

# -----------------------
# 1. SAVE MODEL WEIGHTS
# -----------------------

# PyTorch native
torch.save(model.state_dict(), f"{SAVE_DIR}/efficientnetv2_landslide.pth")

# Full model (picklable)
torch.save(model, f"{SAVE_DIR}/efficientnetv2_landslide_full.pt")

# TorchScript
scripted = torch.jit.script(model)
scripted.save(f"{SAVE_DIR}/efficientnetv2_landslide_scripted.pt")

print("✅ Model saved in .pth, .pt, TorchScript formats")

# -----------------------
# 2. EXPORT TO H5 (Keras-style container)
# -----------------------
# Save weights as numpy → H5 container
import h5py

with h5py.File(f"{SAVE_DIR}/efficientnetv2_landslide_weights.h5", "w") as f:
    for k, v in model.state_dict().items():
        f.create_dataset(k, data=v.cpu().numpy())

print("✅ Model weights saved in .h5 format")

# -----------------------
# 3. EVALUATION METRICS (TRAIN SET)
# -----------------------

model.eval()
all_preds = []
all_labels = []

eval_loader = DataLoader(
    train_ds,
    batch_size=8,
    shuffle=False,
    num_workers=0
)

with torch.no_grad():
    for imgs, labels in eval_loader:
        imgs = imgs.to(device)
        logits, _ = model(imgs)
        preds = torch.argmax(logits, dim=1).cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

acc = accuracy_score(all_labels, all_preds)
f1  = f1_score(all_labels, all_preds)
prec = precision_score(all_labels, all_preds)
rec  = recall_score(all_labels, all_preds)

metrics = {
    "accuracy": acc,
    "f1_score": f1,
    "precision": prec,
    "recall": rec
}

pd.DataFrame([metrics]).to_csv(f"{SAVE_DIR}/metrics.csv", index=False)

print("✅ Metrics saved:", metrics)

# -----------------------
# 4. SAVE METRICS AS TEXT
# -----------------------
with open(f"{SAVE_DIR}/metrics.txt", "w") as f:
    for k, v in metrics.items():
        f.write(f"{k}: {v:.4f}\n")

# -----------------------
# 5. SAVE TRAINING LOSS GRAPH
# -----------------------
# If you logged loss per epoch, replace this with your loss list
# Otherwise, plot dummy placeholder

loss_history = []  # OPTIONAL: fill if you logged loss per epoch

if len(loss_history) > 0:
    plt.figure(figsize=(6,4))
    plt.plot(loss_history, label="Training Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{SAVE_DIR}/training_loss.png")
    plt.close()
else:
    with open(f"{SAVE_DIR}/training_loss.txt", "w") as f:
        f.write("Loss history not logged during training.\n")

print("✅ Graphs and logs saved")

# -----------------------
# 6. FINAL CONFIRMATION
# -----------------------
print("\n🎉 EVERYTHING SAVED SUCCESSFULLY")
print("📂 Location:", SAVE_DIR)
