****

In [2]:
# 1. Setup & Extract Zip File
from google.colab import drive
import os, shutil
drive.mount('/content/drive', force_remount=True)
DRIVE_DIR = "/content/drive/My Drive"

os.makedirs("/content/train_data/train_data", exist_ok=True)
os.makedirs("/content/test_data/test_data",  exist_ok=True)
shutil.unpack_archive(os.path.join(DRIVE_DIR, "train_data.zip"), "/content/train_data")
shutil.unpack_archive(os.path.join(DRIVE_DIR, "test_data.zip"),  "/content/test_data")



Mounted at /content/drive


In [3]:
# Install Required Packages
!pip install -q timm scikit-learn tqdm catboost
!pip install ipywidgets


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m111.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m90.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

**TRAIN DATA VISUALIZATION**

In [4]:
%reload_ext autoreload
%reload_ext widgetsnbextension

In [5]:
# === 1. Imports and Paths ===
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

# === 2. Set Paths ===
TRAIN_DIR = "/content/train_data/train_data"
TRAIN_CSV = "/content/Train.csv"

# === 3. Load CSV and Shuffle ===
df_full = pd.read_csv(TRAIN_CSV)
df_full = df_full.sample(frac=1).reset_index(drop=True)
filtered_df = df_full.copy()  # Viewable subset based on filter

# === 4. Viewer State ===
index = 0

# === 5. UI Widgets ===
next_button = widgets.Button(description="Next")
back_button = widgets.Button(description="Back")
label_toggle = widgets.ToggleButtons(options=["Show Label", "Hide Label"], value="Show Label")
channel_slider = widgets.IntSlider(value=0, min=0, max=11, description="Channel:", continuous_update=False)
label_filter = widgets.ToggleButtons(
    options=["All", "Landslide only", "No Landslide only"],
    value="All",
    description="Filter:"
)
output_area = widgets.Output()

# === 6. Show Image Function ===
def show_image(idx):
    if idx >= len(filtered_df):
        with output_area:
            clear_output(wait=True)
            print("No more images to show.")
        return

    row = filtered_df.iloc[idx]
    img_id = row["ID"]
    label = row["label"]

    img_path = os.path.join(TRAIN_DIR, f"{img_id}.npy")
    if not os.path.exists(img_path):
        with output_area:
            clear_output(wait=True)
            print(f"File not found: {img_path}")
        return

    img = np.load(img_path)  # (H, W, 12)

    # RGB approximation
    if img.shape[-1] >= 3:
        rgb_img = img[:, :, :3]
        rgb_img = (rgb_img - rgb_img.min()) / (rgb_img.max() - rgb_img.min() + 1e-6)
    else:
        rgb_img = np.stack([img[:, :, 0]] * 3, axis=-1)

    # Draw
    with output_area:
        clear_output(wait=True)
        fig, axes = plt.subplots(1, 2, figsize=(8, 4))

        axes[0].imshow(rgb_img)
        title = f"ID: {img_id}"
        if label_toggle.value == "Show Label":
            title += f" | Label: {'Landslide' if label == 1 else 'No Landslide'}"
        axes[0].set_title(title)
        axes[0].axis("off")

        ch = channel_slider.value
        axes[1].imshow(img[:, :, ch], cmap="gray")
        axes[1].set_title(f"Channel {ch}")
        axes[1].axis("off")

        plt.tight_layout()
        plt.show()

# === 7. Filter Function ===
def apply_filter_and_refresh():
    global filtered_df, index
    if label_filter.value == "Landslide only":
        filtered_df = df_full[df_full["label"] == 1].reset_index(drop=True)
    elif label_filter.value == "No Landslide only":
        filtered_df = df_full[df_full["label"] == 0].reset_index(drop=True)
    else:
        filtered_df = df_full.copy()
    index = 0
    show_image(index)

# === 8. Navigation Callbacks ===
def on_next_clicked(b):
    global index
    index = min(index + 1, len(filtered_df) - 1)
    show_image(index)

def on_back_clicked(b):
    global index
    index = max(index - 1, 0)
    show_image(index)

def on_toggle_changed(change):
    show_image(index)

def on_channel_changed(change):
    show_image(index)

def on_filter_changed(change):
    apply_filter_and_refresh()

# === 9. Bind Events ===
next_button.on_click(on_next_clicked)
back_button.on_click(on_back_clicked)
label_toggle.observe(on_toggle_changed, names="value")
channel_slider.observe(on_channel_changed, names="value")
label_filter.observe(on_filter_changed, names="value")

# === 10. Display Viewer ===

title_html = widgets.HTML(value="""
    <h3 style='
        margin-left: 20%;
        font-size: 24px;
        color: seagreen;
        font-weight: bold;
        margin-bottom: 10px;
    '>
        🌍 TRAINING DATA VISUALIZATION
    </h3>
""")
control_row = widgets.HBox([
    back_button, next_button,
    label_toggle, channel_slider,
    label_filter
])
display(title_html)
display(control_row)
display(output_area)
show_image(index)

# === 11. Reload Function ===
def reload_viewer(new_index=0, new_csv_path=None, keep_filter=True):
    """
    Reloads the viewer.
    - new_index: which image to start at
    - new_csv_path: optional path to new CSV
    - keep_filter: if False, resets filter to 'All'
    """
    global index, df_full, filtered_df
    index = new_index

    if new_csv_path is not None:
        df_full = pd.read_csv(new_csv_path)
        df_full = df_full.sample(frac=1).reset_index(drop=True)

    if not keep_filter:
        label_filter.value = "All"

    apply_filter_and_refresh()


HTML(value="\n    <h3 style='\n        margin-left: 20%;\n        font-size: 24px;\n        color: seagreen;\n…

HBox(children=(Button(description='Back', style=ButtonStyle()), Button(description='Next', style=ButtonStyle()…

Output()

**MY MODEL PREDICTIONS**

In [7]:
# === [1] Imports ===
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.feature import graycomatrix, graycoprops
from scipy.ndimage import sobel
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.ensemble import HistGradientBoostingClassifier

# === [2] Feature Extraction Function ===
def extract_features(df, data_dir):
    features = []
    for sid in tqdm(df['ID'], desc="Extracting"):
        arr = np.load(os.path.join(data_dir, f"{sid}.npy"))  # shape: (64, 64, 12)

        means = arr.mean((0, 1))
        stds = arr.std((0, 1))
        mins = arr.min((0, 1))
        maxs = arr.max((0, 1))
        medians = np.median(arr, (0, 1))
        p25 = np.percentile(arr, 25, (0, 1))
        p75 = np.percentile(arr, 75, (0, 1))

        odiff = arr[..., 4:8] - arr[..., 0:4]
        od_means = odiff.mean((0, 1))
        od_stds = odiff.std((0, 1))

        sar_diff = arr[..., 10:12] - arr[..., 8:10]
        sd_means = sar_diff.mean((0, 1))
        sd_stds = sar_diff.std((0, 1))

        ndvi_pre = (arr[..., 3] - arr[..., 2]) / (arr[..., 3] + arr[..., 2] + 1e-6)
        ndvi_post = (arr[..., 7] - arr[..., 6]) / (arr[..., 7] + arr[..., 6] + 1e-6)
        ndvi_diff = ndvi_post - ndvi_pre
        ndvi_pre_var = ndvi_pre.var()
        ndvi_post_var = ndvi_post.var()
        corr_pre = np.corrcoef(arr[..., 3].flatten(), arr[..., 2].flatten())[0, 1]
        corr_post = np.corrcoef(arr[..., 7].flatten(), arr[..., 6].flatten())[0, 1]

        ch = ((arr[..., 3] - arr[..., 3].min()) / np.ptp(arr[..., 3]) * 255).astype(np.uint8)
        glcm = graycomatrix(ch, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        glcm_feats = [graycoprops(glcm, prop)[0, 0] for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy']]

        edge = sobel(arr[..., 3])
        edge_density = (edge > 0.1).sum() / edge.size

        features.append(np.hstack([
          means, stds, mins, maxs, medians, p25, p75,
          od_means, od_stds,
          sd_means, sd_stds,
          ndvi_pre.mean(), ndvi_pre.std(), ndvi_pre.var(),
          ndvi_post.mean(), ndvi_post.std(), ndvi_post.var(),
          ndvi_diff.mean(), ndvi_diff.std(),
          corr_pre, corr_post,
          glcm_feats, edge_density
      ]))

    return pd.DataFrame(features)

# === [3] Load Data ===
train_df = pd.read_csv("/content/Train.csv")
test_df = pd.read_csv("/content/Test.csv")
TRAIN_DIR = "/content/train_data/train_data"
TEST_DIR = "/content/test_data/test_data"

X_train = extract_features(train_df, TRAIN_DIR)
X_test = extract_features(test_df, TEST_DIR)
y = train_df['label']

# === [4] Train HistGradientBoosting with CV ===
FOLDS = 5
oof = np.zeros(len(X_train))
preds = np.zeros(len(X_test))

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=42)

print("\n🔁 Training HistGradientBoosting with CV...")
for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y)):
    print(f"Fold {fold+1}")
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]

    model = HistGradientBoostingClassifier(random_state=42, max_iter=500)
    model.fit(X_tr, y_tr)

    oof[val_idx] = model.predict_proba(X_val)[:, 1]
    preds += model.predict_proba(X_test)[:, 1] / FOLDS

# === [5] Apply Fixed Threshold 0.39 ===
final_threshold = 0.39
final_preds = (preds > final_threshold).astype(int)
f1_cv = f1_score(y, (oof > final_threshold).astype(int))

print(f"\n✅ Final F1 Score (CV) = {f1_cv:.4f} @ Threshold = {final_threshold}")
print(f"🧮 Total predicted landslides in test set: {final_preds.sum()}")

# === [6] Save Submission ===
submission = pd.DataFrame({
    "ID": test_df["ID"],
    "label": final_preds
})
submission.to_csv("submission_histgb_8803.csv", index=False)
print("📦 Saved: submission_histgb_8803.csv")


Extracting: 100%|██████████| 7147/7147 [01:19<00:00, 90.01it/s] 
Extracting: 100%|██████████| 5398/5398 [00:57<00:00, 94.11it/s] 



🔁 Training HistGradientBoosting with CV...
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5

✅ Final F1 Score (CV) = 0.8595 @ Threshold = 0.39
🧮 Total predicted landslides in test set: 666
📦 Saved: submission_histgb_8803.csv


**MY PREDICTIONS VISUALIZATION**

In [8]:
# === 1. Imports ===
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

# === 2. Paths ===
PRED_CSV = "/content/submission_histgb_8803.csv"  # already saved earlier
TEST_DIR = "/content/test_data/test_data"

# === 3. Load predictions ===
df_pred = pd.read_csv(PRED_CSV)
df_pred = df_pred.sample(frac=1).reset_index(drop=True)  # Shuffle
filtered_df = df_pred.copy()

# === 4. Viewer State ===
index = 0

# === 5. UI Widgets ===
next_button = widgets.Button(description="Next")
back_button = widgets.Button(description="Back")
channel_slider = widgets.IntSlider(value=0, min=0, max=11, description="Channel:", continuous_update=False)
label_filter = widgets.ToggleButtons(
    options=["All", "Landslide only", "No Landslide only"],
    value="All",
    description="Filter:"
)
output_area = widgets.Output()

# === 6. Display Function ===
def show_image(idx):
    if idx >= len(filtered_df):
        with output_area:
            clear_output(wait=True)
            print("No more images.")
        return

    row = filtered_df.iloc[idx]
    img_id = row["ID"]
    pred = row["label"]

    img_path = os.path.join(TEST_DIR, f"{img_id}.npy")
    if not os.path.exists(img_path):
        with output_area:
            clear_output(wait=True)
            print(f"Image not found: {img_path}")
        return

    img = np.load(img_path)

    if img.shape[-1] >= 3:
        rgb = img[:, :, :3]
        rgb = (rgb - rgb.min()) / (rgb.max() - rgb.min() + 1e-6)
    else:
        rgb = np.stack([img[:, :, 0]] * 3, axis=-1)

    with output_area:
        clear_output(wait=True)
        fig, axes = plt.subplots(1, 2, figsize=(8, 4))

        axes[0].imshow(rgb)
        axes[0].set_title(f"ID: {img_id}\nPrediction: {'Landslide' if pred == 1 else 'No Landslide'}")
        axes[0].axis("off")

        ch = channel_slider.value
        axes[1].imshow(img[:, :, ch], cmap="gray")
        axes[1].set_title(f"Channel {ch}")
        axes[1].axis("off")

        plt.tight_layout()
        plt.show()

# === 7. Filter Logic ===
def apply_filter_and_refresh():
    global filtered_df, index
    if label_filter.value == "Landslide only":
        filtered_df = df_pred[df_pred["label"] == 1].reset_index(drop=True)
    elif label_filter.value == "No Landslide only":
        filtered_df = df_pred[df_pred["label"] == 0].reset_index(drop=True)
    else:
        filtered_df = df_pred.copy()
    index = 0
    show_image(index)

# === 8. Callbacks ===
def on_next(b):
    global index
    index = min(index + 1, len(filtered_df) - 1)
    show_image(index)

def on_back(b):
    global index
    index = max(index - 1, 0)
    show_image(index)

def on_channel_change(change):
    show_image(index)

def on_filter_change(change):
    apply_filter_and_refresh()

# === 9. Bind Events ===
next_button.on_click(on_next)
back_button.on_click(on_back)
channel_slider.observe(on_channel_change, names="value")
label_filter.observe(on_filter_change, names="value")

# === 10. Launch Viewer with Left-Aligned Title at 1/3 Position ===
title_html = widgets.HTML(value="""
    <h3 style='
        margin-left: 20%;
        font-size: 24px;
        color: seagreen;
        font-weight: bold;
        margin-bottom: 10px;
    '>
        🌍 MY PREDICTIONS VISUALIZATION
    </h3>
""")

display(title_html)
display(widgets.HBox([back_button, next_button, channel_slider, label_filter]))
display(output_area)
show_image(index)

HTML(value="\n    <h3 style='\n        margin-left: 20%;\n        font-size: 24px;\n        color: seagreen;\n…

HBox(children=(Button(description='Back', style=ButtonStyle()), Button(description='Next', style=ButtonStyle()…

Output()