In [1]:
import tensorflow as tf
print("Is GPU available?", tf.config.list_physical_devices('GPU'))


E0000 00:00:1734858923.384925    1456 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734858923.442019    1456 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Is GPU available? [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### define directories

In [2]:

# Paths
dataset_path = "../data/selected_categories/EuroSAT"
test_data_path = "../data/test_data"
preprocessed_dir = "../data/preprocessed_data_imSize513"
categories = ["Forest", "Residential", "Highway", "AnnualCrop", "HerbaceousVegetation", "Industrial"]
image_size = 513  # Resize to 513*513 -- better to use in pre-tarined DeepLabV3 


### preprocess in batches so as to not run into issues

In [None]:
import os
import cv2
import numpy as np


os.makedirs(preprocessed_dir, exist_ok=True)

# Preprocessing
for category in categories:
    category_path = os.path.join(dataset_path, category)
    output_path = os.path.join(preprocessed_dir, f"{category}.npy")
    
    print(f"Processing category: {category}")
    images = []
    for file in os.listdir(category_path):
        file_path = os.path.join(category_path, file)
        try:
            img = cv2.imread(file_path)
            img = cv2.resize(img, (image_size, image_size))
            img = img / 255.0  # Normalize to [0, 1]
            images.append(img)
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
    
    # Save preprocessed images to disk
    np.save(output_path, np.array(images))
    print(f"Saved preprocessed images for {category} to {output_path}")


Processing category: Forest


In [None]:
data = np.load("../data/preprocessed_data/AnnualCrop.npy")
print(data.shape)  # Output: (N, 128, 128, 3)
print(data[0])  # First image as a NumPy array with pixel values in [0, 1]


In [None]:
#not as pixel but it seeing it as image

import matplotlib.pyplot as plt

# Display the first image
plt.imshow(data[0])  # Show the first image
plt.title("AnnualCrop Image")
plt.axis("off")
plt.show()


### Load Preprocessed Data

### Combine all .npy files into a single dataset and create train/test splits.

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import os



# Batch loading function
def batch_split_and_save(preprocessed_dir, categories, test_size=0.2, val_size=0.2):
    for category in categories:
        file_path = os.path.join(preprocessed_dir, f"{category}.npy")
        print(f"Processing {category}")
        
        # Load the category data
        images = np.load(file_path)
        labels = np.full(len(images), categories.index(category))
        
        # Split into Train+Validation and Test
        X_train_val, X_test, y_train_val, y_test = train_test_split(
            images, labels, test_size=test_size, random_state=42
        )
        
        # Split Train+Validation into Training and Validation
        X_train, X_val, y_train, y_val = train_test_split(
            X_train_val, y_train_val, test_size=val_size, random_state=42
        )
        
        # Save splits for this category
        np.save(os.path.join(preprocessed_dir, f"{category}_train.npy"), X_train)
        np.save(os.path.join(preprocessed_dir, f"{category}_val.npy"), X_val)
        np.save(os.path.join(preprocessed_dir, f"{category}_test.npy"), X_test)
        np.save(os.path.join(preprocessed_dir, f"{category}_train_labels.npy"), y_train)
        np.save(os.path.join(preprocessed_dir, f"{category}_val_labels.npy"), y_val)
        np.save(os.path.join(preprocessed_dir, f"{category}_test_labels.npy"), y_test)
        print(f"Saved splits for {category}")

print("preprocessed_dir", preprocessed_dir)
batch_split_and_save(preprocessed_dir, categories)


In [None]:
### check the size of the train, val and test for one Category

import numpy as np
import os

print(np.load(os.path.join(preprocessed_dir, "AnnualCrop_train.npy")).shape)
print(np.load(os.path.join(preprocessed_dir, "AnnualCrop_val.npy")).shape)
print(np.load(os.path.join(preprocessed_dir, "AnnualCrop_test.npy")).shape)


In [None]:
#display sample of one image per category

import numpy as np
import matplotlib.pyplot as plt
import os
import random

# Function to visualize one random image from each category
def display_one_per_category(preprocessed_dir, categories):
    plt.figure(figsize=(15, 5))
    for i, category in enumerate(categories):
        # Load data for the category
        category_images = np.load(os.path.join(preprocessed_dir, f"{category}_train.npy"))
        
        # Randomly pick an image
        random_idx = random.randint(0, len(category_images) - 1)
        image = category_images[random_idx]
        
        # Display the image
        plt.subplot(1, len(categories), i + 1)
        plt.imshow(image)
        plt.title(category)
        plt.axis("off")
    plt.show()

# Visualize one random image from each category
display_one_per_category(preprocessed_dir, categories)


In [None]:
import os
import numpy as np


# Initialize total image count
total_images = 0

# Ensure the folder exists
if os.path.exists(preprocessed_dir):
    for file in os.listdir(preprocessed_dir):
        if file.endswith(".npy"):  # Only process .npy files
            file_path = os.path.join(preprocessed_dir, file)
            # Load the numpy file and add the number of images
            try:
                data = np.load(file_path)  # Corrected to load the file path
                total_images += data.shape[0]  # Add the number of images
            except Exception as e:
                print(f"Error processing file {file}: {e}")
    print(f"Total number of images in '{preprocessed_dir}': {total_images}")
else:
    print(f"Directory '{preprocessed_dir}' does not exist. Please check the path.")

# Memory usage calculation
image_size = (128, 128, 3)  # Replace with your image dimensions
dtype = np.float32  # Replace with your data type

# Memory required in bytes
memory_bytes = total_images * np.prod(image_size) * np.dtype(dtype).itemsize
memory_gb = memory_bytes / (1024 ** 3)

print(f"Estimated memory usage: {memory_gb:.2f} GB")


In [None]:
import psutil

# Get system memory details
memory = psutil.virtual_memory()

# Total memory in GB
total_memory_gb = memory.total / (1024 ** 3)

print(f"Total System Memory: {total_memory_gb:.2f} GB")


# Reshape labels for binary segmentation
y_train = np.expand_dims(y_train, axis=-1)  # Add the last channel dimension
y_val = np.expand_dims(y_val, axis=-1)
y_test = np.expand_dims(y_test, axis=-1)

# Expand each label to match the spatial dimensions of the image
y_train = np.broadcast_to(y_train, (y_train.shape[0], 128, 128, 1))
y_val = np.broadcast_to(y_val, (y_val.shape[0], 128, 128, 1))
y_test = np.broadcast_to(y_test, (y_test.shape[0], 128, 128, 1))

print("New y_train shape:", y_train.shape)  # Should be (10880, 128, 128, 1)
print("New y_val shape:", y_val.shape)  # Should be (2720, 128, 128, 1)
print("New y_test shape:", y_test.shape)  # Should be (3400, 128, 128, 1)


print("X_train shape:", X_train.shape)  # Should be (batch_size, 128, 128, 3)
print("y_train shape:", y_train.shape)  # Should be (batch_size, 128, 128, 1)


In [None]:
### Balancing Classes
### Why This Matters:

### If your dataset has significantly more images for some categories (e.g., Forest) than others (e.g., Highway), the model may become biased toward the dominant classes. Balancing ensures fair contributions from all categories.
### Code to Check Class Balance

import numpy as np
import os


# Check training set balance
class_counts = {}
for category in categories:
    labels = np.load(os.path.join(output_dir, f"{category}_train_labels.npy"))
    class_counts[category] = len(labels)

# Print class distribution
for category, count in class_counts.items():
    print(f"{category}: {count} images")


In [2]:
import numpy as np


training_dir = "../data/training_data_224"
images = np.load(f'{training_dir}/AnnualCrop_train.npy')
labels = np.load(f'{training_dir}/AnnualCrop_train_labels.npy')

# Check sample shapes and alignment
print(f"Images shape: {images.shape}, Labels shape: {labels.shape}")
print(f"First 5 labels: {labels[:5]}")

# Debug: Check the shape of one-hot encoded labels
print("Shape of labels:", labels.shape)

# Check the range of pixel values in the dataset
print(f"Min pixel value: {images.min()}")
print(f"Max pixel value: {images.max()}")

# Check the range of pixel values in the dataset
print(f"Min pixel value for Labels: {labels.min()}")
print(f"Max pixel value for Labels: {labels.max()}")


#cv2.imshow('Sample Image', images[0])
#cv2.waitKey(0)
#cv2.destroyAllWindows()


Images shape: (1920, 224, 224, 3), Labels shape: (1920,)
First 5 labels: [3 3 3 3 3]
Shape of labels: (1920,)
Min pixel value: 0.10196078568696976
Max pixel value: 1.0
Min pixel value for Labels: 3
Max pixel value for Labels: 3
