In [1]:
import os
import zipfile
import shutil
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import (
    ResNet152, MobileNetV3Large, VGG19, Xception
)


  if not hasattr(np, "object"):


In [2]:
ZIP_PATH = "remotesensing.zip"

WORKING_DIR = "RS_working"
DATA_DIR = os.path.join(WORKING_DIR, "gamma-correct")
MIXED_DIR = os.path.join(WORKING_DIR, "mixed_images")

os.makedirs(WORKING_DIR, exist_ok=True)
os.makedirs(MIXED_DIR, exist_ok=True)

with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(WORKING_DIR)

print("‚úÖ Dataset unzipped completely")


‚úÖ Dataset unzipped completely


In [3]:
image_records = []
image_counter = 0

print("üìÇ Dataset root detected:", DATA_DIR)

for class_name in sorted(os.listdir(DATA_DIR)):
    class_path = os.path.join(DATA_DIR, class_name)

    if not os.path.isdir(class_path):
        continue

    print(f"‚û°Ô∏è Processing class: {class_name}")

    for image_name in sorted(os.listdir(class_path)):
        if image_name.lower().endswith(('.png', '.jpg', '.jpeg')):

            image_counter += 1
            src_path = os.path.join(class_path, image_name)
            dst_path = os.path.join(MIXED_DIR, image_name)

            shutil.copy(src_path, dst_path)

            image_records.append({
                "image_name": image_name,
                "label": class_name
            })

print(f"‚úÖ Mixed images created: {image_counter}")


üìÇ Dataset root detected: RS_working\gamma-correct
‚û°Ô∏è Processing class: Field
‚û°Ô∏è Processing class: Forest
‚û°Ô∏è Processing class: Grass
‚û°Ô∏è Processing class: Industry
‚û°Ô∏è Processing class: Parking
‚û°Ô∏è Processing class: Resident
‚û°Ô∏è Processing class: River
‚úÖ Mixed images created: 2800


In [4]:
labels_df = pd.DataFrame(image_records)
LABELS_CSV = os.path.join(WORKING_DIR, "labels.csv")
labels_df.to_csv(LABELS_CSV, index=False)

print("‚úÖ labels.csv saved successfully")


‚úÖ labels.csv saved successfully


In [5]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

datagen = ImageDataGenerator(rescale=1./255)

generator = datagen.flow_from_dataframe(
    dataframe=labels_df,
    directory=MIXED_DIR,
    x_col="image_name",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)


Found 2800 validated image filenames belonging to 7 classes.


In [6]:
def build_model(base_model_fn, num_classes):
    base_model = base_model_fn(
        include_top=False,
        weights=None,
        input_shape=(224, 224, 3)
    )

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=output)

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [None]:
from tensorflow.keras.applications import EfficientNetB0

MODELS = {
    "ResNet152": ResNet152,
    "MobileNetV3": MobileNetV3Large,
    "VGG19": VGG19,
    "Xception": Xception,
    "EfficientNetB0": EfficientNetB0
}

NUM_CLASSES = labels_df['label'].nunique()
true_labels = labels_df['label'].astype('category').cat.codes.values

feature_results = {}


In [None]:
SAVE_DIR = os.path.join(WORKING_DIR, "saved_features")
os.makedirs(SAVE_DIR, exist_ok=True)


true_labels = labels_df['label'].astype('category').cat.codes.values

np.save(
    os.path.join(SAVE_DIR, "true_labels.npy"),
    true_labels
)

print("‚úÖ True labels saved")


In [None]:
SAVE_DIR = os.path.join(WORKING_DIR, "saved_features")
os.makedirs(SAVE_DIR, exist_ok=True)

true_labels = labels_df['label'].astype('category').cat.codes.values
np.save(os.path.join(SAVE_DIR, "true_labels.npy"), true_labels)

feature_results = {}

for model_name, model_fn in MODELS.items():
    print(f"\nüöÄ Training {model_name}")

    # ---------- BUILD MODEL ----------
    model = build_model(model_fn, NUM_CLASSES)

    # ---------- TRAIN ----------
    model.fit(
        generator,
        epochs=5,
        verbose=1
    )

    # ---------- FEATURE EXTRACTION (Dense-256) ----------
    feature_extractor = Model(
        inputs=model.input,
        outputs=model.layers[-3].output
    )

    features = feature_extractor.predict(generator)

    # ---------- SAVE FEATURES ----------
    feature_path = os.path.join(SAVE_DIR, f"{model_name}_features.npy")
    np.save(feature_path, features)

    feature_results[model_name] = features

    print(f"‚úÖ Features saved for {model_name}: {features.shape}")



üöÄ Training ResNet152
Epoch 1/5


In [None]:
results = []

for model_name, features in feature_results.items():
    print(f"\nüîç Clustering using {model_name}")

    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    pca = PCA(n_components=0.95)
    features_pca = pca.fit_transform(features_scaled)

    kmeans = KMeans(
        n_clusters=NUM_CLASSES,
        random_state=42
    )

    cluster_labels = kmeans.fit_predict(features_pca)

    changed = np.sum(cluster_labels != true_labels)
    change_percentage = (changed / len(true_labels)) * 100

    results.append({
        "Model": model_name,
        "Total Images": len(true_labels),
        "Changed Labels": changed,
        "Change %": round(change_percentage, 2)
    })


In [None]:
results_df = pd.DataFrame(results)

print("\nüìä FINAL CLUSTERING RESULTS")
display(results_df)
