In [1]:
%pip install tensorflow keras numpy pandas matplotlib scikit-learn keras-tuner building_footprint_segmentation

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting building_footprint_segmentation
  Downloading building_footprint_segmentation-0.2.4-py3-none-any.whl.metadata (5.0 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading building_footprint_segmentation-0.2.4-py3-none-any.whl (35 kB)
Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, building_footprint_segmentation, keras-tuner
Successfully installed building_footprint_segmentation-0.2.4 keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [7]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array

# Additional imports for result visualization
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Function to classify a single image
def classify_image(image_path, classification_model, class_labels):
    try:
        # Load the image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Error reading image {image_path}")

        # Convert to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resize to match the input size of the classifier
        image_resized = cv2.resize(image, (224, 224))

        # Convert to array and preprocess
        image_array = img_to_array(image_resized)
        image_array = np.expand_dims(image_array, axis=0)
        image_array = preprocess_input(image_array)

        # Predict using the classification model
        predictions = classification_model.predict(image_array)
        probabilities = predictions[0]
        top_class_index = np.argmax(probabilities)
        top_class = class_labels[top_class_index]
        probability = probabilities[top_class_index]

        return top_class, probability
    except Exception as e:
        print(f"Failed to classify {os.path.basename(image_path)}: {e}")
        return None, None

# Process each image: classify it and store results
def process_single_image(image_path, actual_class, classification_model, class_labels, visualize=False):
    try:
        # Classify the image
        predicted_class, probability = classify_image(image_path, classification_model, class_labels)

        if predicted_class is None:
            return False, None  # Classification failed

        # Print the classification result
        print(f"Image: {os.path.basename(image_path)} | Predicted: {predicted_class} ({probability:.4f}) | Actual: {actual_class}")

        # Determine if the prediction is correct
        correct = (predicted_class == actual_class)

        # Visualization
        if visualize:
            image = cv2.imread(image_path)
            if image is not None:
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                plt.figure(figsize=(6, 6))
                plt.imshow(image)
                plt.title(f"Predicted: {predicted_class} ({probability:.2f})\nActual: {actual_class}")
                plt.axis('off')
                plt.show()

        return correct, predicted_class
    except Exception as e:
        print(f"Failed to process {os.path.basename(image_path)}: {e}")
        return False, None

# Main function to process all images in the class folders
def process_images_in_folders(data_folder, classification_model, class_labels, visualize=False):
    total_correct = 0
    total_predictions = 0

    # Initialize per-class accuracy counts
    correct_predictions_per_class = {class_name: 0 for class_name in class_labels}
    total_predictions_per_class = {class_name: 0 for class_name in class_labels}

    # Lists to collect actual and predicted labels for confusion matrix
    y_true = []
    y_pred = []

    # Get list of class folders
    class_folders = [os.path.join(data_folder, d) for d in os.listdir(data_folder)
                     if os.path.isdir(os.path.join(data_folder, d))]

    # Collect all image paths and their actual classes with random sampling
    image_paths = []
    for class_folder in class_folders:
        class_name = os.path.basename(class_folder)
        images_in_class = [os.path.join(class_folder, f) for f in os.listdir(class_folder)
                           if f.lower().endswith(('.tif', '.jpg', '.png', '.jpeg'))]
        if len(images_in_class) > 20:
            selected_images = random.sample(images_in_class, 20)
        else:
            selected_images = images_in_class  # If less than 20 images, take all
        for img_path in selected_images:
            image_paths.append((img_path, class_name))

    # Shuffle image paths
    random.shuffle(image_paths)

    # Process images sequentially
    for image_path, actual_class in tqdm(image_paths, desc="Processing Images"):
        try:
            correct, predicted_class = process_single_image(
                image_path, actual_class, classification_model, class_labels, visualize=visualize)

            if predicted_class is not None:
                total_predictions += 1
                if correct:
                    total_correct += 1
                    correct_predictions_per_class[actual_class] += 1
                total_predictions_per_class[actual_class] += 1

                # Collect labels for confusion matrix
                y_true.append(actual_class)
                y_pred.append(predicted_class)
        except Exception as e:
            print(f"Failed to process {image_path}: {e}")

    # Calculate overall accuracy
    if total_predictions > 0:
        accuracy = (total_correct / total_predictions) * 100
    else:
        accuracy = 0.0

    print(f"\nTotal correct predictions: {total_correct}")
    print(f"Total predictions: {total_predictions}")
    print(f"Overall Accuracy: {accuracy:.2f}%")

    # Calculate and print per-class accuracy
    print("\nAccuracy per class:")
    for class_name in class_labels:
        correct = correct_predictions_per_class[class_name]
        total = total_predictions_per_class[class_name]
        if total > 0:
            class_accuracy = (correct / total) * 100
            print(f"{class_name}: {class_accuracy:.2f}% ({correct}/{total})")
        else:
            print(f"{class_name}: No predictions.")

    # Generate confusion matrix and classification report
    if y_true and y_pred:
        # Generate confusion matrix
        cm = confusion_matrix(y_true, y_pred, labels=class_labels)
        plt.figure(figsize=(10, 7))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=class_labels, yticklabels=class_labels)
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        plt.show()

        # Print classification report
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, labels=class_labels))

        # Analyze misclassifications
        print("\nMisclassified Samples:")
        misclassified = [(true, pred) for true, pred in zip(y_true, y_pred) if true != pred]
        for actual, predicted in misclassified:
            print(f'Actual: {actual}, Predicted: {predicted}')
    else:
        print("No predictions to generate confusion matrix and classification report.")

if __name__ == "__main__":
    # Data directory containing class folders
    data_folder = r'/content/drive/MyDrive/ML/Model/val'  # Update the path to your data folder

    # Define class labels (ensure these are in the correct order corresponding to your model's output)
    class_labels = ['Commercial', 'High', 'Hospital', 'Industrial', 'Multi', 'School', 'Single']  # Replace with your actual class names

    # Load the pre-trained classification model
    classification_model_path = r"/content/drive/MyDrive/Madhu RA Work Folder/Densenet201_combined_2.h5"  # Update the path to your model

    # Load the classification model
    try:
        classification_model = load_model(classification_model_path)
        print("Classification model loaded successfully.")
    except Exception as e:
        print(f"Error loading classification model: {e}")
        exit(1)

    # Set to True to visualize all images
    visualize = True  # Set to False to disable visualization

    # Process all images in the data folder
    process_images_in_folders(
        data_folder,
        classification_model,
        class_labels,
        visualize=visualize
    )

Output hidden; open in https://colab.research.google.com to view.

In [15]:
import os
import hashlib
import logging

# Configure logging for duplicate checking
logging.basicConfig(
    filename='duplicate_check.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)
existing_train_dir = '/content/drive/MyDrive/RA Satellite Imagery Work/Abduls RA Work Folder/SAT_Images/Phase 2'
new_data_dir = '/content/drive/MyDrive/ML/Model/val'
t_data_dir = '/content/drive/MyDrive/ML/Model/test'
# Example using imagehash
from PIL import Image
import imagehash

def find_duplicate_images_perceptual(data_directories):
    hash_dict = {}
    duplicates = []
    total_images = 0

    for data_directory in data_directories:
        if not os.path.isdir(data_directory):
            logging.warning(f"Data directory '{data_directory}' does not exist. Skipping.")
            continue
        for root, dirs, files in os.walk(data_directory):
            for filename in files:
                if filename.lower().endswith(('.tif', '.tiff', '.jpg', '.jpeg', '.png', '.bmp', '.gif')):
                    filepath = os.path.join(root, filename)
                    total_images += 1
                    try:
                        image = Image.open(filepath)
                        filehash = str(imagehash.phash(image))
                        if filehash in hash_dict:
                            duplicates.append((filepath, hash_dict[filehash]))
                            logging.info(f"Duplicate found: {filepath} and {hash_dict[filehash]}")
                        else:
                            hash_dict[filehash] = filepath
                    except Exception as e:
                        logging.error(f"Error processing file {filepath}: {e}")

    print(f"Total images scanned: {total_images}")
    print(f"Total duplicates found: {len(duplicates)}")

    if duplicates:
        print("\nList of duplicate images:")
        for dup in duplicates:
            print(f"Duplicate pair:\n  {dup[0]}\n  {dup[1]}\n")
    else:
        print("No duplicate images found.")

    return duplicates

if __name__ == "__main__":
    # Specify your data directories
    train_dirs = [existing_train_dir, new_data_dir, t_data_dir]

    print("Checking for duplicate images in the dataset...")
    logging.info("Starting duplicate image check.")
    duplicates = find_duplicate_images_perceptual(train_dirs)
    logging.info("Duplicate image check completed.")


Checking for duplicate images in the dataset...
Total images scanned: 1676
Total duplicates found: 133

List of duplicate images:
Duplicate pair:
  /content/drive/MyDrive/RA Satellite Imagery Work/Abduls RA Work Folder/SAT_Images/Phase 2/Industrial/37.775766_-97.410278_IN_KS_244.tif
  /content/drive/MyDrive/RA Satellite Imagery Work/Abduls RA Work Folder/SAT_Images/Phase 2/Industrial/37.782362_-97.409057_IN_KS_245.tif

Duplicate pair:
  /content/drive/MyDrive/ML/Model/val/Hospital/41.741104_72.198011_HL_CT_183.tif.tif
  /content/drive/MyDrive/RA Satellite Imagery Work/Abduls RA Work Folder/SAT_Images/Phase 2/Hospital/41.741104_72.198011_HL_CT_183.tif.tif

Duplicate pair:
  /content/drive/MyDrive/ML/Model/val/Hospital/40.182136_105.126654_HL_CO_148.tif.tif
  /content/drive/MyDrive/RA Satellite Imagery Work/Abduls RA Work Folder/SAT_Images/Phase 2/Hospital/40.182136_105.126654_HL_CO_148.tif.tif

Duplicate pair:
  /content/drive/MyDrive/ML/Model/val/Hospital/41.111597_73.422624_HL_CT_187.

In [9]:
%pip install imagehash

Collecting imagehash
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting PyWavelets (from imagehash)
  Downloading pywavelets-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Downloading ImageHash-4.3.1-py2.py3-none-any.whl (296 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.5/296.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pywavelets-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyWavelets, imagehash
Successfully installed PyWavelets-1.7.0 imagehash-4.3.1


In [17]:
import numpy as np

# Define the confusion matrix
confusion_matrix = np.array([
    [8, 0, 0, 1, 1, 0, 0],
    [0,15, 0, 0, 0, 0, 0],
    [0, 0, 9, 0, 1, 0, 0],
    [0, 0, 0,11, 0, 0, 0],
    [1, 2, 0, 0, 7, 0, 1],
    [0, 0, 0, 0, 2, 8, 0],
    [0, 0, 0, 0, 0, 1, 9]
])

# Define the class labels
labels = ['Commercial', 'High', 'Hospital', 'Industrial', 'Multi', 'Schools', 'Single']

# Initialize a dictionary to store per-class accuracy
per_class_accuracy = {}

# Calculate per-class accuracy
for i in range(len(labels)):
    correct_predictions = confusion_matrix[i, i]
    total_samples = confusion_matrix[i, :].sum()
    accuracy = (correct_predictions / total_samples) * 100
    per_class_accuracy[labels[i]] = accuracy

# Display the per-class accuracy
for label in labels:
    print(f"Accuracy for class '{label}': {per_class_accuracy[label]:.2f}%")


Accuracy for class 'Commercial': 80.00%
Accuracy for class 'High': 100.00%
Accuracy for class 'Hospital': 90.00%
Accuracy for class 'Industrial': 100.00%
Accuracy for class 'Multi': 63.64%
Accuracy for class 'Schools': 80.00%
Accuracy for class 'Single': 90.00%
