In [1]:
# Notebook config (run this first cell)
from pathlib import Path
import os, sys

BASE = Path(r"C:\Users\Lenovo\Desktop\LY MAJOR PROJECT")
DATA = BASE / "data"
IMAGES = DATA / "images"
POP = DATA / "population"
ROADS = DATA / "roads"
BOUNDARIES = DATA / "boundaries"
PROCESSED = DATA / "processed"
PATCHES = DATA / "patches"
OUTPUTS = BASE / "outputs"

# Create folders (safe)
for p in [PROCESSED, PATCHES, OUTPUTS, OUTPUTS/"maps", OUTPUTS/"charts"]:
    p.mkdir(parents=True, exist_ok=True)

print("Base:", BASE)
print("Processed folder:", PROCESSED)


Base: C:\Users\Lenovo\Desktop\LY MAJOR PROJECT
Processed folder: C:\Users\Lenovo\Desktop\LY MAJOR PROJECT\data\processed


In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
import os

data_path = PROCESSED / "features_stack.npz"
data = np.load(data_path)
print("Available keys:", list(data.keys()))

# Load feature data
features = data["features"]  # shape: (H, W, Bands)
print("Features shape:", features.shape)


Available keys: ['features']
Features shape: (3451, 3508, 7)


In [3]:
# -----------------------------
# 1. Parameters
# -----------------------------
PATCH_SIZE = 64
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15


In [4]:
# -----------------------------
# 2. Patch Creation
# -----------------------------
def create_patches(features, patch_size=64):
    """Create patches from (H, W, Bands) raster."""
    H, W, B = features.shape
    patches = []
    
    for i in range(0, H - patch_size + 1, patch_size):
        for j in range(0, W - patch_size + 1, patch_size):
            patch = features[i:i+patch_size, j:j+patch_size, :]
            patches.append(patch)
    patches = np.array(patches)
    print(f"✅ Created {patches.shape[0]} patches of size {patch_size}x{patch_size} with {B} bands.")
    return patches

patches = create_patches(features, PATCH_SIZE)
print("Patches shape:", patches.shape)


✅ Created 2862 patches of size 64x64 with 7 bands.
Patches shape: (2862, 64, 64, 7)


In [5]:
# -----------------------------
# 3. Normalize per band
# -----------------------------
def normalize_patches(patches):
    """Normalize each patch per band."""
    patches = patches.astype("float32")
    for b in range(patches.shape[-1]):
        band = patches[..., b]
        min_val, max_val = band.min(), band.max()
        patches[..., b] = (band - min_val) / (max_val - min_val + 1e-6)
    return patches

patches = normalize_patches(patches)
print("Normalized range:", patches.min(), patches.max())


Normalized range: 0.0 1.0


In [6]:
# -----------------------------
# 4. Train/Val/Test Split
# -----------------------------
X_train, X_temp = train_test_split(patches, test_size=(1-train_ratio), random_state=42)
X_val, X_test = train_test_split(
    X_temp,
    test_size=test_ratio/(test_ratio+val_ratio),
    random_state=42
)

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)


Train: (2003, 64, 64, 7)
Val: (429, 64, 64, 7)
Test: (430, 64, 64, 7)


In [7]:
# -----------------------------
# 5. Save to Disk
# -----------------------------
output_dir = PROCESSED
os.makedirs(output_dir, exist_ok=True)

np.savez_compressed(output_dir / "train.npz", X=X_train)
np.savez_compressed(output_dir / "val.npz", X=X_val)
np.savez_compressed(output_dir / "test.npz", X=X_test)

print("✅ Saved train/val/test patch datasets in:", output_dir)


✅ Saved train/val/test patch datasets in: C:\Users\Lenovo\Desktop\LY MAJOR PROJECT\data\processed
